aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDerek Schuff <dschuff@chromium.org>2012-11-27 11:21:28 -0800
committerDerek Schuff <dschuff@chromium.org>2012-11-27 11:22:07 -0800
commit3c4392af7177f4bd64bdc8659de729b9e65716e8 (patch)
treee38ea5f509f28448725fc257c7f0276eac4f647a
parent3b46d602e10074ce1d54b49a3c5ec9ed708425a6 (diff)
parent8d20b5f9ff609e70fae5c865931ab0f29e639d9c (diff)
Merge commit '8d20b5f9ff609e70fae5c865931ab0f29e639d9c'
Conflicts: lib/CodeGen/AsmPrinter/DwarfDebug.cpp lib/CodeGen/AsmPrinter/DwarfDebug.h lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp lib/Target/Mips/MipsISelDAGToDAG.cpp lib/Target/Mips/MipsInstrFPU.td lib/Target/Mips/MipsSubtarget.cpp lib/Target/Mips/MipsSubtarget.h lib/Target/X86/X86MCInstLower.cpp tools/Makefile tools/llc/llc.cpp
-rw-r--r--CMakeLists.txt3
-rw-r--r--CODE_OWNERS.TXT85
-rw-r--r--CREDITS.TXT10
-rw-r--r--LICENSE.TXT1
-rw-r--r--autoconf/configure.ac16
-rw-r--r--bindings/python/llvm/common.py50
-rw-r--r--cmake/modules/HandleLLVMOptions.cmake11
-rwxr-xr-xconfigure40
-rw-r--r--docs/BitCodeFormat.rst4
-rw-r--r--docs/CodeGenerator.rst24
-rw-r--r--docs/CodingStandards.rst43
-rw-r--r--docs/CommandGuide/FileCheck.rst163
-rw-r--r--docs/CommandGuide/lit.rst20
-rw-r--r--docs/CompilerWriterInfo.rst2
-rw-r--r--docs/DeveloperPolicy.rst4
-rw-r--r--docs/GCCFEBuildInstrs.html279
-rw-r--r--docs/GettingStarted.rst39
-rw-r--r--[-rwxr-xr-x]docs/HowToUseInstrMappings.rst4
-rw-r--r--docs/LLVMBuild.html368
-rw-r--r--docs/LLVMBuild.rst325
-rw-r--r--docs/LangRef.html341
-rw-r--r--docs/MakefileGuide.rst2
-rw-r--r--docs/Passes.html24
-rw-r--r--docs/Projects.rst6
-rw-r--r--docs/ReleaseNotes.html63
-rw-r--r--docs/SourceLevelDebugging.html2858
-rw-r--r--docs/SourceLevelDebugging.rst2285
-rw-r--r--docs/SystemLibrary.html316
-rw-r--r--docs/SystemLibrary.rst250
-rw-r--r--docs/TableGenFundamentals.rst31
-rw-r--r--docs/TestSuiteMakefileGuide.html351
-rw-r--r--docs/TestSuiteMakefileGuide.rst279
-rw-r--r--docs/TestingGuide.html916
-rw-r--r--docs/TestingGuide.rst529
-rw-r--r--docs/conf.py4
-rw-r--r--docs/development_process.rst3
-rw-r--r--docs/subsystems.rst12
-rw-r--r--docs/userguides.rst3
-rw-r--r--examples/ExceptionDemo/ExceptionDemo.cpp5
-rw-r--r--include/llvm-c/lto.h7
-rw-r--r--include/llvm/ADT/MapVector.h12
-rw-r--r--include/llvm/ADT/STLExtras.h6
-rw-r--r--include/llvm/ADT/SmallVector.h2
-rw-r--r--include/llvm/ADT/Triple.h4
-rw-r--r--include/llvm/Analysis/DependenceAnalysis.h28
-rw-r--r--include/llvm/Analysis/InlineCost.h13
-rw-r--r--include/llvm/Analysis/InstructionSimplify.h12
-rw-r--r--include/llvm/Attributes.h28
-rw-r--r--include/llvm/Bitcode/Archive.h6
-rw-r--r--include/llvm/Bitcode/BitCodes.h27
-rw-r--r--include/llvm/Bitcode/BitstreamReader.h82
-rw-r--r--include/llvm/Bitcode/BitstreamWriter.h28
-rw-r--r--include/llvm/Bitcode/LLVMBitCodes.h16
-rw-r--r--include/llvm/CallingConv.h4
-rw-r--r--include/llvm/CodeGen/AsmPrinter.h6
-rw-r--r--include/llvm/CodeGen/DFAPacketizer.h2
-rw-r--r--include/llvm/CodeGen/MachineInstrBundle.h5
-rw-r--r--include/llvm/CodeGen/MachineModuleInfo.h2
-rw-r--r--include/llvm/CodeGen/MachineScheduler.h29
-rw-r--r--include/llvm/CodeGen/RegisterScavenging.h8
-rw-r--r--include/llvm/CodeGen/ScheduleDAG.h60
-rw-r--r--include/llvm/CodeGen/TargetLoweringObjectFileImpl.h19
-rw-r--r--include/llvm/Constant.h13
-rw-r--r--include/llvm/Constants.h150
-rw-r--r--include/llvm/DataLayout.h21
-rw-r--r--include/llvm/DebugInfo.h90
-rw-r--r--include/llvm/DebugInfo/DIContext.h11
-rw-r--r--include/llvm/ExecutionEngine/NaClJITMemoryManager.h5
-rw-r--r--include/llvm/ExecutionEngine/OProfileWrapper.h8
-rw-r--r--include/llvm/ExecutionEngine/ObjectBuffer.h160
-rw-r--r--include/llvm/ExecutionEngine/ObjectImage.h122
-rw-r--r--include/llvm/ExecutionEngine/RuntimeDyld.h11
-rw-r--r--include/llvm/Instruction.h62
-rw-r--r--include/llvm/Intrinsics.td5
-rw-r--r--include/llvm/IntrinsicsCellSPU.td242
-rw-r--r--include/llvm/MC/MCDwarf.h120
-rw-r--r--include/llvm/MC/MCExpr.h6
-rw-r--r--include/llvm/MC/MCInstBuilder.h68
-rw-r--r--include/llvm/MC/MCStreamer.h3
-rw-r--r--include/llvm/Object/Archive.h14
-rw-r--r--include/llvm/Object/ELF.h16
-rw-r--r--include/llvm/Object/RelocVisitor.h19
-rw-r--r--include/llvm/Operator.h8
-rw-r--r--include/llvm/Pass.h10
-rw-r--r--include/llvm/PassManager.h8
-rw-r--r--include/llvm/PassManagers.h14
-rw-r--r--include/llvm/Support/Compiler.h26
-rw-r--r--include/llvm/Support/Dwarf.h29
-rw-r--r--include/llvm/Support/ELF.h4
-rw-r--r--include/llvm/Support/GetElementPtrTypeIterator.h8
-rw-r--r--include/llvm/Support/YAMLParser.h4
-rw-r--r--include/llvm/Support/circular_raw_ostream.h4
-rw-r--r--include/llvm/Target/TargetInstrInfo.h22
-rw-r--r--include/llvm/Target/TargetLibraryInfo.h24
-rw-r--r--include/llvm/Target/TargetLoweringObjectFile.h13
-rw-r--r--include/llvm/Target/TargetSubtargetInfo.h7
-rw-r--r--include/llvm/Transforms/Instrumentation.h4
-rw-r--r--include/llvm/Transforms/Utils/SimplifyLibCalls.h3
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp15
-rw-r--r--lib/Analysis/DependenceAnalysis.cpp212
-rw-r--r--lib/Analysis/IPA/CallGraphSCCPass.cpp3
-rw-r--r--lib/Analysis/InlineCost.cpp178
-rw-r--r--lib/Analysis/InstructionSimplify.cpp21
-rw-r--r--lib/AsmParser/LLLexer.cpp2
-rw-r--r--lib/AsmParser/LLParser.cpp144
-rw-r--r--lib/AsmParser/LLParser.h26
-rw-r--r--lib/Bitcode/Reader/BitReader.cpp10
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp89
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.h50
-rw-r--r--lib/Bitcode/Writer/BitWriter.cpp9
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp96
-rw-r--r--lib/Bitcode/Writer/BitcodeWriterPass.cpp4
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.cpp31
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.h20
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp67
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp20
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp38
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp70
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp314
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h111
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp20
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h3
-rw-r--r--lib/CodeGen/CallingConvLower.cpp12
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp3
-rw-r--r--lib/CodeGen/LiveInterval.cpp12
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp15
-rw-r--r--lib/CodeGen/MachineCSE.cpp62
-rw-r--r--lib/CodeGen/MachineInstrBundle.cpp6
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp2
-rw-r--r--lib/CodeGen/MachineScheduler.cpp269
-rw-r--r--lib/CodeGen/Passes.cpp8
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp14
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp13
-rw-r--r--lib/CodeGen/RegAllocBase.cpp1
-rw-r--r--lib/CodeGen/RegAllocFast.cpp1
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp2
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp234
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp6
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp74
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp53
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp13
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp421
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h7
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp9
-rw-r--r--lib/CodeGen/StackColoring.cpp4
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp3
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp85
-rw-r--r--lib/DebugInfo/DIContext.cpp13
-rw-r--r--lib/DebugInfo/DWARFContext.cpp86
-rw-r--r--lib/DebugInfo/DWARFContext.h35
-rw-r--r--lib/DebugInfo/DWARFDebugAranges.cpp49
-rw-r--r--lib/DebugInfo/DWARFDebugAranges.h8
-rw-r--r--lib/DebugInfo/DWARFFormValue.cpp12
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp75
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h6
-rw-r--r--lib/ExecutionEngine/JIT/JITMemoryManager.cpp6
-rw-r--r--lib/ExecutionEngine/JIT/NaClJITMemoryManager.cpp3
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.cpp9
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h152
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp16
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp1
-rw-r--r--lib/MC/MCAsmStreamer.cpp22
-rw-r--r--lib/MC/MCDisassembler/Disassembler.cpp13
-rw-r--r--lib/MC/MCDwarf.cpp171
-rw-r--r--lib/MC/MCExpr.cpp4
-rw-r--r--lib/MC/MCParser/AsmParser.cpp43
-rw-r--r--lib/MC/MCStreamer.cpp107
-rw-r--r--lib/Object/Archive.cpp161
-rw-r--r--lib/Support/FoldingSet.cpp4
-rw-r--r--lib/Support/Triple.cpp20
-rw-r--r--lib/Support/Unix/PathV2.inc7
-rw-r--r--lib/Support/YAMLParser.cpp20
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp640
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h2
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp6
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp168
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp65
-rw-r--r--lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td10
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td15
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td13
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp15
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h5
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp46
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp38
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp147
-rw-r--r--lib/Target/CellSPU/CMakeLists.txt30
-rw-r--r--lib/Target/CellSPU/CellSDKIntrinsics.td449
-rw-r--r--lib/Target/CellSPU/LLVMBuild.txt32
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt6
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt23
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/Makefile16
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp43
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h30
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp94
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h38
-rw-r--r--lib/Target/CellSPU/Makefile20
-rw-r--r--lib/Target/CellSPU/README.txt106
-rw-r--r--lib/Target/CellSPU/SPU.h31
-rw-r--r--lib/Target/CellSPU/SPU.td66
-rw-r--r--lib/Target/CellSPU/SPU128InstrInfo.td41
-rw-r--r--lib/Target/CellSPU/SPU64InstrInfo.td408
-rw-r--r--lib/Target/CellSPU/SPUAsmPrinter.cpp333
-rw-r--r--lib/Target/CellSPU/SPUCallingConv.td53
-rw-r--r--lib/Target/CellSPU/SPUFrameLowering.cpp256
-rw-r--r--lib/Target/CellSPU/SPUFrameLowering.h80
-rw-r--r--lib/Target/CellSPU/SPUHazardRecognizers.cpp135
-rw-r--r--lib/Target/CellSPU/SPUHazardRecognizers.h37
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp1192
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp3266
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h178
-rw-r--r--lib/Target/CellSPU/SPUInstrBuilder.h43
-rw-r--r--lib/Target/CellSPU/SPUInstrFormats.td320
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp449
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.h84
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td4484
-rw-r--r--lib/Target/CellSPU/SPUMachineFunction.cpp14
-rw-r--r--lib/Target/CellSPU/SPUMachineFunction.h50
-rw-r--r--lib/Target/CellSPU/SPUMathInstr.td97
-rw-r--r--lib/Target/CellSPU/SPUNodes.td159
-rw-r--r--lib/Target/CellSPU/SPUNopFiller.cpp153
-rw-r--r--lib/Target/CellSPU/SPUOperands.td664
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp357
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.h106
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.td183
-rw-r--r--lib/Target/CellSPU/SPURegisterNames.h19
-rw-r--r--lib/Target/CellSPU/SPUSchedule.td59
-rw-r--r--lib/Target/CellSPU/SPUSelectionDAGInfo.cpp23
-rw-r--r--lib/Target/CellSPU/SPUSelectionDAGInfo.h31
-rw-r--r--lib/Target/CellSPU/SPUSubtarget.cpp65
-rw-r--r--lib/Target/CellSPU/SPUSubtarget.h97
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp94
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.h96
-rw-r--r--lib/Target/CellSPU/TargetInfo/CMakeLists.txt7
-rw-r--r--lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp20
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormats.td65
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp18
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td193
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td176
-rw-r--r--lib/Target/Hexagon/HexagonOperands.td (renamed from lib/Target/Hexagon/HexagonImmediates.td)429
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h63
-rw-r--r--lib/Target/LLVMBuild.txt2
-rw-r--r--lib/Target/MSP430/MSP430CallingConv.td3
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp91
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h1
-rw-r--r--lib/Target/MSP430/MSP430MachineFunctionInfo.h6
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp10
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h8
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp12
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h12
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp12
-rw-r--r--lib/Target/Mips/Mips.td7
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td2
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp51
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp4
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp281
-rw-r--r--lib/Target/Mips/MipsISelLowering.h2
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td2
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td6
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp8
-rw-r--r--lib/Target/Mips/MipsMCInstLower.cpp4
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp4
-rw-r--r--lib/Target/Mips/MipsSubtarget.h6
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp76
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp58
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h3
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h1
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp36
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp20
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp140
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp25
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td16
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td10
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td41
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td24
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp3
-rw-r--r--lib/Target/TargetLibraryInfo.cpp47
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp24
-rw-r--r--lib/Target/TargetSubtargetInfo.cpp4
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp47
-rw-r--r--lib/Target/X86/X86.h6
-rw-r--r--lib/Target/X86/X86CallingConv.td95
-rw-r--r--lib/Target/X86/X86FastISel.cpp8
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp8
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp3
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp199
-rw-r--r--lib/Target/X86/X86InstrFMA.td56
-rw-r--r--lib/Target/X86/X86InstrFormats.td2
-rw-r--r--lib/Target/X86/X86InstrInfo.td4
-rw-r--r--lib/Target/X86/X86InstrSSE.td52
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp69
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp54
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp1
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp8
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h6
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp22
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp8
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp6
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp9
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp54
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp23
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp5
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp21
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp25
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp8
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp204
-rw-r--r--lib/Transforms/Instrumentation/BlackList.cpp18
-rw-r--r--lib/Transforms/Instrumentation/BlackList.h1
-rw-r--r--lib/Transforms/Instrumentation/BoundsChecking.cpp7
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp10
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp30
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp14
-rw-r--r--lib/Transforms/Scalar/NaClCcRewrite.cpp2
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp116
-rw-r--r--lib/Transforms/Scalar/SROA.cpp381
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp469
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp43
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp23
-rw-r--r--lib/Transforms/Utils/MetaRenamer.cpp2
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp17
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp495
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp124
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp559
-rw-r--r--lib/VMCore/AsmWriter.cpp4
-rw-r--r--lib/VMCore/Attributes.cpp100
-rw-r--r--lib/VMCore/AttributesImpl.h24
-rw-r--r--lib/VMCore/Constants.cpp131
-rw-r--r--lib/VMCore/DataLayout.cpp42
-rw-r--r--lib/VMCore/DebugInfo.cpp28
-rw-r--r--lib/VMCore/Instruction.cpp4
-rw-r--r--lib/VMCore/Instructions.cpp11
-rw-r--r--lib/VMCore/LLVMContextImpl.cpp9
-rw-r--r--lib/VMCore/LLVMContextImpl.h11
-rw-r--r--lib/VMCore/PassManager.cpp82
-rw-r--r--lib/VMCore/Type.cpp28
-rw-r--r--lib/VMCore/Verifier.cpp41
-rw-r--r--projects/sample/Makefile.llvm.config.in6
-rw-r--r--projects/sample/autoconf/configure.ac26
-rwxr-xr-xprojects/sample/configure56
-rw-r--r--test/Analysis/BasicAA/phi-spec-order.ll71
-rw-r--r--test/Analysis/DependenceAnalysis/Banerjee.ll152
-rw-r--r--test/Analysis/DependenceAnalysis/Coupled.ll329
-rw-r--r--test/Analysis/DependenceAnalysis/ExactRDIV.ll402
-rw-r--r--test/Analysis/DependenceAnalysis/ExactSIV.ll252
-rw-r--r--test/Analysis/DependenceAnalysis/GCD.ll158
-rw-r--r--test/Analysis/DependenceAnalysis/Preliminary.ll644
-rw-r--r--test/Analysis/DependenceAnalysis/Propagating.ll302
-rw-r--r--test/Analysis/DependenceAnalysis/Separability.ll188
-rw-r--r--test/Analysis/DependenceAnalysis/StrongSIV.ll334
-rw-r--r--test/Analysis/DependenceAnalysis/SymbolicRDIV.ll357
-rw-r--r--test/Analysis/DependenceAnalysis/SymbolicSIV.ll271
-rw-r--r--test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll186
-rw-r--r--test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll168
-rw-r--r--test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll168
-rw-r--r--test/Analysis/DependenceAnalysis/ZIV.ll39
-rw-r--r--test/Assembler/getelementptr.ll19
-rw-r--r--test/Assembler/getelementptr_vec_idx1.ll10
-rw-r--r--test/Assembler/getelementptr_vec_idx2.ll10
-rw-r--r--test/Assembler/getelementptr_vec_idx3.ll10
-rw-r--r--test/Assembler/getelementptr_vec_struct.ll10
-rw-r--r--test/Assembler/global-addrspace-forwardref.ll8
-rw-r--r--test/CMakeLists.txt20
-rw-r--r--test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll2
-rw-r--r--test/CodeGen/ARM/2012-11-14-subs_carry.ll31
-rw-r--r--test/CodeGen/ARM/arm-ttype-target2.ll44
-rw-r--r--test/CodeGen/ARM/atomic-64bit.ll82
-rw-r--r--test/CodeGen/ARM/coalesce-subregs.ll2
-rw-r--r--test/CodeGen/ARM/domain-conv-vmovs.ll20
-rw-r--r--test/CodeGen/ARM/ehabi-filters.ll77
-rw-r--r--test/CodeGen/ARM/ehabi-no-landingpad.ll18
-rw-r--r--test/CodeGen/ARM/fabs-neon.ll17
-rw-r--r--test/CodeGen/ARM/fast-isel-GEP-coalesce.ll4
-rw-r--r--test/CodeGen/ARM/fast-isel-br-const.ll4
-rw-r--r--test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll4
-rw-r--r--test/CodeGen/ARM/fast-isel-crash.ll2
-rw-r--r--test/CodeGen/ARM/fast-isel-crash2.ll2
-rw-r--r--test/CodeGen/ARM/fast-isel-deadcode.ll2
-rw-r--r--test/CodeGen/ARM/fast-isel-fold.ll4
-rw-r--r--test/CodeGen/ARM/fast-isel-frameaddr.ll8
-rw-r--r--test/CodeGen/ARM/neon_fpconv.ll17
-rw-r--r--test/CodeGen/CellSPU/2009-01-01-BrCond.ll31
-rw-r--r--test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll28
-rw-r--r--test/CodeGen/CellSPU/and_ops.ll282
-rw-r--r--test/CodeGen/CellSPU/arg_ret.ll34
-rw-r--r--test/CodeGen/CellSPU/bigstack.ll17
-rw-r--r--test/CodeGen/CellSPU/bss.ll11
-rw-r--r--test/CodeGen/CellSPU/call.ll49
-rw-r--r--test/CodeGen/CellSPU/crash.ll8
-rw-r--r--test/CodeGen/CellSPU/ctpop.ll30
-rw-r--r--test/CodeGen/CellSPU/div_ops.ll22
-rw-r--r--test/CodeGen/CellSPU/dp_farith.ll102
-rw-r--r--test/CodeGen/CellSPU/eqv.ll152
-rw-r--r--test/CodeGen/CellSPU/extract_elt.ll277
-rw-r--r--test/CodeGen/CellSPU/fcmp32.ll36
-rw-r--r--test/CodeGen/CellSPU/fcmp64.ll7
-rw-r--r--test/CodeGen/CellSPU/fdiv.ll22
-rw-r--r--test/CodeGen/CellSPU/fneg-fabs.ll42
-rw-r--r--test/CodeGen/CellSPU/i64ops.ll57
-rw-r--r--test/CodeGen/CellSPU/i8ops.ll25
-rw-r--r--test/CodeGen/CellSPU/icmp16.ll574
-rw-r--r--test/CodeGen/CellSPU/icmp32.ll575
-rw-r--r--test/CodeGen/CellSPU/icmp64.ll146
-rw-r--r--test/CodeGen/CellSPU/icmp8.ll446
-rw-r--r--test/CodeGen/CellSPU/immed16.ll40
-rw-r--r--test/CodeGen/CellSPU/immed32.ll83
-rw-r--r--test/CodeGen/CellSPU/immed64.ll95
-rw-r--r--test/CodeGen/CellSPU/int2fp.ll41
-rw-r--r--test/CodeGen/CellSPU/intrinsics_branch.ll150
-rw-r--r--test/CodeGen/CellSPU/intrinsics_float.ll94
-rw-r--r--test/CodeGen/CellSPU/intrinsics_logical.ll49
-rw-r--r--test/CodeGen/CellSPU/jumptable.ll21
-rw-r--r--test/CodeGen/CellSPU/loads.ll59
-rw-r--r--test/CodeGen/CellSPU/mul-with-overflow.ll15
-rw-r--r--test/CodeGen/CellSPU/mul_ops.ll88
-rw-r--r--test/CodeGen/CellSPU/nand.ll125
-rw-r--r--test/CodeGen/CellSPU/or_ops.ll278
-rw-r--r--test/CodeGen/CellSPU/private.ll19
-rw-r--r--test/CodeGen/CellSPU/rotate_ops.ll172
-rw-r--r--test/CodeGen/CellSPU/select_bits.ll572
-rw-r--r--test/CodeGen/CellSPU/sext128.ll71
-rw-r--r--test/CodeGen/CellSPU/shift_ops.ll348
-rw-r--r--test/CodeGen/CellSPU/shuffles.ll69
-rw-r--r--test/CodeGen/CellSPU/sp_farith.ll90
-rw-r--r--test/CodeGen/CellSPU/stores.ll181
-rw-r--r--test/CodeGen/CellSPU/storestruct.ll13
-rw-r--r--test/CodeGen/CellSPU/struct_1.ll147
-rw-r--r--test/CodeGen/CellSPU/sub_ops.ll26
-rw-r--r--test/CodeGen/CellSPU/trunc.ll94
-rw-r--r--test/CodeGen/CellSPU/useful-harnesses/README.txt5
-rw-r--r--test/CodeGen/CellSPU/useful-harnesses/i32operations.c69
-rw-r--r--test/CodeGen/CellSPU/useful-harnesses/i64operations.c673
-rw-r--r--test/CodeGen/CellSPU/useful-harnesses/i64operations.h43
-rw-r--r--test/CodeGen/CellSPU/useful-harnesses/lit.local.cfg1
-rw-r--r--test/CodeGen/CellSPU/useful-harnesses/vecoperations.c179
-rw-r--r--test/CodeGen/CellSPU/v2f32.ll78
-rw-r--r--test/CodeGen/CellSPU/v2i32.ll61
-rw-r--r--test/CodeGen/CellSPU/vec_const.ll154
-rw-r--r--test/CodeGen/CellSPU/vecinsert.ll131
-rw-r--r--test/CodeGen/Generic/vector.ll5
-rw-r--r--test/CodeGen/Hexagon/postinc-load.ll29
-rw-r--r--test/CodeGen/MSP430/byval.ll26
-rw-r--r--test/CodeGen/MSP430/vararg.ll50
-rw-r--r--test/CodeGen/Mips/addressing-mode.ll41
-rw-r--r--test/CodeGen/Mips/biggot.ll50
-rw-r--r--test/CodeGen/Mips/brdelayslot.ll32
-rw-r--r--test/CodeGen/NVPTX/global-ordering.ll20
-rw-r--r--test/CodeGen/NVPTX/pr13291-i1-store.ll26
-rw-r--r--test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll2
-rw-r--r--test/CodeGen/PowerPC/2012-10-11-dynalloc.ll18
-rw-r--r--test/CodeGen/PowerPC/2012-11-16-mischedcall.ll33
-rw-r--r--test/CodeGen/PowerPC/available-externally.ll56
-rw-r--r--test/CodeGen/PowerPC/i64_fp_round.ll10
-rw-r--r--test/CodeGen/PowerPC/mem_update.ll2
-rw-r--r--test/CodeGen/PowerPC/ppc64-toc.ll2
-rw-r--r--test/CodeGen/PowerPC/stubs.ll4
-rw-r--r--test/CodeGen/PowerPC/vec_extload.ll2
-rw-r--r--test/CodeGen/PowerPC/vec_rounding.ll172
-rw-r--r--test/CodeGen/X86/2008-10-27-StackRealignment.ll22
-rw-r--r--test/CodeGen/X86/2010-01-08-Atomic64Bug.ll8
-rw-r--r--test/CodeGen/X86/avx-intel-ocl.ll94
-rw-r--r--test/CodeGen/X86/avx-shuffle.ll51
-rw-r--r--test/CodeGen/X86/avx-splat.ll14
-rw-r--r--test/CodeGen/X86/byval2.ll4
-rw-r--r--test/CodeGen/X86/byval3.ll4
-rw-r--r--test/CodeGen/X86/byval4.ll4
-rw-r--r--test/CodeGen/X86/byval5.ll4
-rw-r--r--test/CodeGen/X86/dynamic-allocas-VLAs.ll12
-rw-r--r--test/CodeGen/X86/hipe-cc.ll77
-rw-r--r--test/CodeGen/X86/hipe-cc64.ll87
-rw-r--r--test/CodeGen/X86/inline-asm.ll7
-rw-r--r--test/CodeGen/X86/memcpy-2.ll1
-rw-r--r--test/CodeGen/X86/memset-sse-stack-realignment.ll77
-rw-r--r--test/CodeGen/X86/memset.ll25
-rw-r--r--test/CodeGen/X86/memset64-on-x86-32.ll1
-rw-r--r--test/CodeGen/X86/pr14314.ll13
-rw-r--r--test/CodeGen/X86/pr14333.ll12
-rw-r--r--test/CodeGen/X86/vec_floor.ll144
-rw-r--r--test/CodeGen/XCore/aliases.ll32
-rw-r--r--test/ExecutionEngine/MCJIT/lit.local.cfg8
-rw-r--r--test/ExecutionEngine/MCJIT/simpletest-remote.ll12
-rw-r--r--test/ExecutionEngine/MCJIT/stubs-remote.ll36
-rw-r--r--test/ExecutionEngine/MCJIT/test-common-symbols-remote.ll89
-rw-r--r--test/ExecutionEngine/MCJIT/test-data-align-remote.ll16
-rw-r--r--test/ExecutionEngine/MCJIT/test-fp-no-external-funcs-remote.ll22
-rw-r--r--test/ExecutionEngine/MCJIT/test-global-ctors.ll21
-rw-r--r--test/ExecutionEngine/MCJIT/test-global-init-nonzero-remote.ll35
-rw-r--r--test/ExecutionEngine/MCJIT/test-ptr-reloc-remote.ll17
-rw-r--r--test/ExecutionEngine/lit.local.cfg11
-rw-r--r--test/FileCheck/lit.local.cfg1
-rw-r--r--test/FileCheck/next-no-match.txt9
-rw-r--r--test/FileCheck/regex-no-match.txt5
-rw-r--r--test/FileCheck/simple-var-capture.txt12
-rw-r--r--test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll45
-rw-r--r--test/Instrumentation/ThreadSanitizer/atomic.ll50
-rw-r--r--test/JitListener/lit.local.cfg11
-rw-r--r--test/JitListener/test-common-symbols.ll113
-rw-r--r--test/JitListener/test-inline.ll219
-rw-r--r--test/JitListener/test-parameters.ll205
-rw-r--r--test/MC/COFF/weak-symbol-section-specification.ll23
-rw-r--r--test/MC/Disassembler/Mips/mips64.txt134
-rw-r--r--test/MC/Disassembler/Mips/mips64_le.txt134
-rw-r--r--test/MC/Disassembler/Mips/mips64r2.txt182
-rw-r--r--test/MC/Disassembler/Mips/mips64r2_le.txt182
-rw-r--r--test/MC/Disassembler/X86/enhanced.txt8
-rw-r--r--test/MC/ELF/cfi-register.s42
-rw-r--r--test/MC/ELF/cfi-undefined.s41
-rw-r--r--test/MC/ELF/gen-dwarf.s44
-rw-r--r--test/MC/Mips/xgot.ll42
-rw-r--r--test/MC/PowerPC/ppc64-relocs-01.ll2
-rw-r--r--test/MC/PowerPC/ppc64-tls-relocs-01.ll28
-rw-r--r--test/MC/X86/x86_errors.s2
-rwxr-xr-xtest/Object/Inputs/coff_archive.libbin0 -> 41196 bytes
-rw-r--r--test/Object/Inputs/liblong_filenames.abin0 -> 10920 bytes
-rw-r--r--test/Object/Inputs/libsimple_archive.abin0 -> 1596 bytes
-rw-r--r--test/Object/archive-long-index.test40
-rw-r--r--test/Object/coff-archive.test225
-rw-r--r--test/Object/simple-archive.test12
-rw-r--r--test/Other/2008-10-15-MissingSpace.ll8
-rwxr-xr-xtest/Scripts/elf-dump48
-rw-r--r--test/Transforms/BBVectorize/X86/cmp-types.ll16
-rw-r--r--test/Transforms/BBVectorize/X86/sh-rec.ll54
-rw-r--r--test/Transforms/BBVectorize/X86/sh-rec2.ll85
-rw-r--r--test/Transforms/BBVectorize/X86/sh-rec3.ll170
-rw-r--r--test/Transforms/BBVectorize/X86/sh-types.ll25
-rw-r--r--test/Transforms/GlobalOpt/blockaddress.ll20
-rw-r--r--test/Transforms/GlobalOpt/tls.ll53
-rw-r--r--test/Transforms/IndVarSimplify/iv-zext.ll2
-rw-r--r--test/Transforms/Inline/lifetime-no-datalayout.ll23
-rw-r--r--test/Transforms/Inline/lifetime.ll44
-rw-r--r--test/Transforms/InstCombine/abs-1.ll41
-rw-r--r--test/Transforms/InstCombine/align-external.ll2
-rw-r--r--test/Transforms/InstCombine/alloca.ll16
-rw-r--r--test/Transforms/InstCombine/cos-1.ll38
-rw-r--r--test/Transforms/InstCombine/cos-2.ll17
-rw-r--r--test/Transforms/InstCombine/debug-line.ll (renamed from test/Transforms/SimplifyLibCalls/debug-line.ll)2
-rw-r--r--test/Transforms/InstCombine/disable-simplify-libcalls.ll99
-rw-r--r--test/Transforms/InstCombine/double-float-shrink-1.ll (renamed from test/Transforms/SimplifyLibCalls/double-float-shrink.ll)262
-rw-r--r--test/Transforms/InstCombine/double-float-shrink-2.ll80
-rw-r--r--test/Transforms/InstCombine/exp2-1.ll76
-rw-r--r--test/Transforms/InstCombine/exp2-2.ll17
-rw-r--r--test/Transforms/InstCombine/ffs-1.ll134
-rw-r--r--test/Transforms/InstCombine/icmp.ll18
-rw-r--r--test/Transforms/InstCombine/isascii-1.ll32
-rw-r--r--test/Transforms/InstCombine/isdigit-1.ll48
-rw-r--r--test/Transforms/InstCombine/memcmp-1.ll4
-rw-r--r--test/Transforms/InstCombine/memcpy-from-global.ll10
-rw-r--r--test/Transforms/InstCombine/pow-1.ll152
-rw-r--r--test/Transforms/InstCombine/pow-2.ll14
-rw-r--r--test/Transforms/InstCombine/pr12338.ll42
-rw-r--r--test/Transforms/InstCombine/printf-1.ll119
-rw-r--r--test/Transforms/InstCombine/sdiv-1.ll4
-rw-r--r--test/Transforms/InstCombine/toascii-1.ll59
-rw-r--r--test/Transforms/InstCombine/vector_gep1.ll5
-rw-r--r--test/Transforms/InstCombine/xor2.ll15
-rw-r--r--test/Transforms/InstSimplify/compare.ll9
-rw-r--r--test/Transforms/LoopVectorize/gcc-examples.ll7
-rw-r--r--test/Transforms/LoopVectorize/no_int_induction.ll33
-rw-r--r--test/Transforms/Reassociate/crash.ll28
-rw-r--r--test/Transforms/SROA/basictest.ll2
-rw-r--r--test/Transforms/SROA/phi-and-select.ll35
-rw-r--r--test/Transforms/SROA/vector-promotion.ll122
-rw-r--r--test/Transforms/SimplifyLibCalls/FFS.ll45
-rw-r--r--test/Transforms/SimplifyLibCalls/IsDigit.ll21
-rw-r--r--test/Transforms/SimplifyLibCalls/Printf.ll37
-rw-r--r--test/Transforms/SimplifyLibCalls/Puts.ll2
-rw-r--r--test/Transforms/SimplifyLibCalls/ToAscii.ll21
-rw-r--r--test/Transforms/SimplifyLibCalls/abs.ll11
-rw-r--r--test/Transforms/SimplifyLibCalls/cos.ll14
-rw-r--r--test/Transforms/SimplifyLibCalls/exp2.ll38
-rw-r--r--test/Transforms/SimplifyLibCalls/floor.ll85
-rw-r--r--test/Transforms/SimplifyLibCalls/iprintf.ll21
-rw-r--r--test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll33
-rw-r--r--test/Transforms/SimplifyLibCalls/pow2.ll37
-rw-r--r--test/lit.cfg2
-rw-r--r--test/lit.site.cfg.in1
-rw-r--r--test/tools/llvm-objdump/disassembly-show-raw.s15
-rw-r--r--test/tools/llvm-objdump/lit.local.cfg (renamed from test/CodeGen/CellSPU/lit.local.cfg)4
-rw-r--r--tools/CMakeLists.txt5
-rw-r--r--tools/LLVMBuild.txt2
-rw-r--r--tools/Makefile5
-rw-r--r--tools/bugpoint/CrashDebugger.cpp2
-rw-r--r--tools/lli/RecordingMemoryManager.cpp16
-rw-r--r--tools/lli/RecordingMemoryManager.h5
-rw-r--r--tools/lli/lli.cpp7
-rw-r--r--tools/llvm-dwarfdump/llvm-dwarfdump.cpp94
-rw-r--r--tools/llvm-extract/llvm-extract.cpp2
-rw-r--r--tools/llvm-jitlistener/CMakeLists.txt20
-rw-r--r--tools/llvm-jitlistener/LLVMBuild.txt (renamed from lib/Target/CellSPU/TargetInfo/LLVMBuild.txt)11
-rw-r--r--tools/llvm-jitlistener/Makefile27
-rw-r--r--tools/llvm-jitlistener/llvm-jitlistener.cpp207
-rw-r--r--tools/llvm-mc/llvm-mc.cpp6
-rw-r--r--tools/llvm-nm/llvm-nm.cpp35
-rw-r--r--tools/llvm-objdump/MachODump.cpp45
-rw-r--r--tools/llvm-objdump/llvm-objdump.cpp11
-rw-r--r--tools/llvm-prof/llvm-prof.cpp2
-rw-r--r--tools/llvm-rtdyld/llvm-rtdyld.cpp7
-rw-r--r--tools/llvm-stress/llvm-stress.cpp2
-rw-r--r--tools/llvm-symbolizer/CMakeLists.txt13
-rw-r--r--tools/llvm-symbolizer/Makefile (renamed from lib/Target/CellSPU/TargetInfo/Makefile)16
-rw-r--r--tools/llvm-symbolizer/llvm-symbolizer.cpp323
-rw-r--r--tools/lto/CMakeLists.txt1
-rw-r--r--tools/lto/LTOCodeGenerator.cpp4
-rw-r--r--tools/lto/LTODisassembler.cpp26
-rw-r--r--tools/lto/lto.exports1
-rw-r--r--tools/opt/opt.cpp2
-rw-r--r--unittests/ExecutionEngine/JIT/JITTest.cpp5
-rw-r--r--unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp3
-rw-r--r--unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h4
-rw-r--r--unittests/Support/AlignOfTest.cpp12
-rw-r--r--unittests/Support/MemoryTest.cpp712
-rw-r--r--unittests/Support/YAMLParserTest.cpp34
-rw-r--r--unittests/VMCore/CMakeLists.txt1
-rw-r--r--unittests/VMCore/ConstantsTest.cpp106
-rw-r--r--unittests/VMCore/WaymarkTest.cpp54
-rw-r--r--utils/FileCheck/FileCheck.cpp122
-rw-r--r--utils/TableGen/EDEmitter.cpp1
-rw-r--r--utils/TableGen/IntrinsicEmitter.cpp2
-rwxr-xr-xutils/UpdateCMakeLists.pl2
-rw-r--r--utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp2
-rw-r--r--utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp2
-rw-r--r--utils/lit/lit/TestRunner.py11
-rwxr-xr-xutils/wciia.py125
631 files changed, 21156 insertions, 35709 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d3edc02198..228732d6a7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,7 +11,7 @@ set(CMAKE_MODULE_PATH
)
set(LLVM_VERSION_MAJOR 3)
-set(LLVM_VERSION_MINOR 2)
+set(LLVM_VERSION_MINOR 3)
set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}svn")
@@ -75,7 +75,6 @@ set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name
set(LLVM_ALL_TARGETS
ARM
- CellSPU
CppBackend
Hexagon
Mips
diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT
index fd7bcda3b7..d056a7f5df 100644
--- a/CODE_OWNERS.TXT
+++ b/CODE_OWNERS.TXT
@@ -8,9 +8,30 @@ beautification by scripts. The fields are: name (N), email (E), web-address
(W), PGP key ID and fingerprint (P), description (D), and snail-mail address
(S).
+N: Joe Abbey
+E: jabbey@arxan.com
+D: LLVM Bitcode (lib/Bitcode/* include/llvm/Bitcode/*)
+
+N: Owen Anderson
+E: resistor@mac.com
+D: SelectionDAG (lib/CodeGen/SelectionDAG/*)
+
+N: Rafael Avila de Espindola
+E: rafael.espindola@gmail.com
+D: Gold plugin (tools/gold/*)
+
+N: Chandler Carruth
+E: chandlerc@gmail.com
+E: chandlerc@google.com
+D: Config, ADT, Support, inlining & related passse, SROA/mem2reg & related passes, CMake, library layering
+
N: Evan Cheng
E: evan.cheng@apple.com
-D: Code generator and all targets
+D: ARM target, parts of code generator not covered by someone else
+
+N: Eric Christopher
+E: echristo@gmail.com
+D: Debug Information, autotools/configure/make build, inline assembly
N: Greg Clayton
D: LLDB
@@ -18,22 +39,58 @@ D: LLDB
N: Peter Collingbourne
D: libclc
+N: Anshuman Dasgupta
+E: adasgupt@codeaurora.org
+D: Hexagon Backend
+
+N: Hal Finkel
+E: hfinkel@anl.gov
+D: BBVectorize and the PowerPC target
+
+N: Venkatraman Govindaraju
+E: venkatra@cs.wisc.edu
+D: Sparc Backend (lib/Target/Sparc/*)
+
N: Doug Gregor
-D: Clang Frontend Libraries
+D: All parts of Clang not covered by someone else
N: Tobias Grosser
D: Polly
+N: James Grosbach
+E: grosbach@apple.com
+D: MC layer
+
N: Howard Hinnant
D: libc++
+N: Justin Holewinski
+E: jholewinski@nvidia.com
+D: NVPTX Target (lib/Target/NVPTX/*)
+
+N: Andy Kaylor
+E: andrew.kaylor@intel.com
+D: MCJIT, RuntimeDyld and JIT event listeners
+
+N: Galina Kistanova
+E: gkistanova@gmail.com
+D: LLVM Buildbot
+
N: Anton Korobeynikov
-E: asl@math.spbu.ru
-D: Exception handling, debug information, and Windows codegen
+E: anton@korobeynikov.info
+D: Exception handling, Windows codegen, ARM EABI
+
+N: Benjamin Kramer
+E: benny.kra@gmail.com
+D: DWARF Parser
N: Ted Kremenek
D: Clang Static Analyzer
+N: Sergei Larin
+E: slarin@codeaurora.org
+D: VLIW Instruction Scheduling, Packetization
+
N: Chris Lattner
E: sabre@nondot.org
W: http://nondot.org/~sabre/
@@ -46,6 +103,26 @@ D: Clang LLVM IR generation
N: Jakob Olesen
D: Register allocators and TableGen
+N: Richard Osborne
+E: richard@xmos.com
+D: XCore Backend
+
+N: Chad Rosier
+E: mcrosier@apple.com
+D: MS-inline asm, Fast-Isel, and the compiler driver
+
+N: Nadav Rotem
+E: nrotem@apple.com
+D: X86 Backend, Loop Vectorizer
+
N: Duncan Sands
E: baldrick@free.fr
D: DragonEgg
+
+N: Richard Smith
+E: richard@metafoo.co.uk
+D: Clang Semantic Analysis (tools/clang/lib/Sema/* tools/clang/include/clang/Sema/*)
+
+N: Andrew Trick
+E: atrick@apple.com
+D: Instruction Scheduling
diff --git a/CREDITS.TXT b/CREDITS.TXT
index 0257918258..ca94065b3c 100644
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@@ -60,9 +60,11 @@ D: Loop unrolling with run-time trip counts.
N: Chandler Carruth
E: chandlerc@gmail.com
+E: chandlerc@google.com
D: Hashing algorithms and interfaces
D: Inline cost analysis
D: Machine block placement pass
+D: SROA
N: Casey Carter
E: ccarter@uiuc.edu
@@ -361,8 +363,8 @@ D: ARM fast-isel improvements
D: Performance monitoring
N: Nadav Rotem
-E: nadav.rotem@intel.com
-D: Vector code generation improvements.
+E: nrotem@apple.com
+D: X86 code generation improvements, Loop Vectorizer.
N: Roman Samoilov
E: roman@codedgers.com
@@ -402,6 +404,10 @@ E: rspencer@reidspencer.com
W: http://reidspencer.com/
D: Lots of stuff, see: http://wiki.llvm.org/index.php/User:Reid
+N: Craig Topper
+E: craig.topper@gmail.com
+D: X86 codegen and disassembler improvements. AVX2 support.
+
N: Edwin Torok
E: edwintorok@gmail.com
D: Miscellaneous bug fixes
diff --git a/LICENSE.TXT b/LICENSE.TXT
index 00cf601169..1015b2d894 100644
--- a/LICENSE.TXT
+++ b/LICENSE.TXT
@@ -64,7 +64,6 @@ Program Directory
Autoconf llvm/autoconf
llvm/projects/ModuleMaker/autoconf
llvm/projects/sample/autoconf
-CellSPU backend llvm/lib/Target/CellSPU/README.txt
Google Test llvm/utils/unittest/googletest
OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex}
pyyaml tests llvm/test/YAMLParser/{*.data, LICENSE.TXT}
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 13134589af..ec557bf388 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -31,9 +31,9 @@ dnl===
dnl===-----------------------------------------------------------------------===
dnl Initialize autoconf and define the package name, version number and
dnl address for reporting bugs.
-AC_INIT([LLVM],[3.2svn],[http://llvm.org/bugs/])
+AC_INIT([LLVM],[3.3svn],[http://llvm.org/bugs/])
AC_DEFINE([LLVM_VERSION_MAJOR], [3], [Major version of the LLVM API])
-AC_DEFINE([LLVM_VERSION_MINOR], [2], [Minor version of the LLVM API])
+AC_DEFINE([LLVM_VERSION_MINOR], [3], [Minor version of the LLVM API])
dnl Provide a copyright substitution and ensure the copyright notice is included
dnl in the output of --version option of the generated configure script.
@@ -690,9 +690,9 @@ AC_ARG_ENABLE(backtraces,
[Enable embedding backtraces on crash (default is YES)]),,
enableval=default)
case "$enableval" in
- yes) AC_SUBST(ENABLE_TIMESTAMPS,[1]) ;;
- no) AC_SUBST(ENABLE_TIMESTAMPS,[0]) ;;
- default) AC_SUBST(ENABLE_TIMESTAMPS,[1]) ;;
+ yes) AC_SUBST(ENABLE_BACKTRACES,[1]) ;;
+ no) AC_SUBST(ENABLE_BACKTRACES,[0]) ;;
+ default) AC_SUBST(ENABLE_BACKTRACES,[1]) ;;
*) AC_MSG_ERROR([Invalid setting for --enable-backtraces. Use "yes" or "no"]) ;;
esac
AC_DEFINE_UNQUOTED([ENABLE_BACKTRACES],$ENABLE_BACKTRACES,
@@ -702,14 +702,14 @@ dnl Allow specific targets to be specified for building (or not)
TARGETS_TO_BUILD=""
AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
[Build specific host targets: all or target1,target2,... Valid targets are:
- host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon,
+ host, x86, x86_64, sparc, powerpc, arm, mips, hexagon,
xcore, msp430, nvptx, and cpp (default=all)]),,
enableval=all)
if test "$enableval" = host-only ; then
enableval=host
fi
case "$enableval" in
- all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;;
+ all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;;
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
case "$a_target" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -721,7 +721,6 @@ case "$enableval" in
mipsel) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
mips64) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
mips64el) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
- spu) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
xcore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
msp430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
@@ -736,7 +735,6 @@ case "$enableval" in
ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
MBlaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
- CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
diff --git a/bindings/python/llvm/common.py b/bindings/python/llvm/common.py
index 0c5fcd03d8..17c22b8ef4 100644
--- a/bindings/python/llvm/common.py
+++ b/bindings/python/llvm/common.py
@@ -12,10 +12,14 @@ from ctypes import c_void_p
from ctypes import cdll
import ctypes.util
+import platform
+
+# LLVM_VERSION: sync with PACKAGE_VERSION in autoconf/configure.ac and CMakeLists.txt
+# but leave out the 'svn' suffix.
+LLVM_VERSION = '3.3'
__all__ = [
'c_object_p',
- 'find_library',
'get_library',
]
@@ -87,20 +91,36 @@ class CachedProperty(object):
return value
-def find_library():
- # FIXME should probably have build system define absolute path of shared
- # library at install time.
- for lib in ['LLVM-3.1svn', 'libLLVM-3.1svn', 'LLVM', 'libLLVM']:
- result = ctypes.util.find_library(lib)
- if result:
- return result
-
- return None
-
def get_library():
"""Obtain a reference to the llvm library."""
- lib = find_library()
- if not lib:
- raise Exception('LLVM shared library not found!')
- return cdll.LoadLibrary(lib)
+ # On Linux, ctypes.cdll.LoadLibrary() respects LD_LIBRARY_PATH
+ # while ctypes.util.find_library() doesn't.
+ # See http://docs.python.org/2/library/ctypes.html#finding-shared-libraries
+ #
+ # To make it possible to run the unit tests without installing the LLVM shared
+ # library into a default linker search path. Always Try ctypes.cdll.LoadLibrary()
+ # with all possible library names first, then try ctypes.util.find_library().
+
+ names = ['LLVM-' + LLVM_VERSION, 'LLVM-' + LLVM_VERSION + 'svn']
+ t = platform.system()
+ if t == 'Darwin':
+ pfx, ext = 'lib', '.dylib'
+ elif t == 'Windows':
+ pfx, ext = '', '.dll'
+ else:
+ pfx, ext = 'lib', '.so'
+
+ for i in names:
+ try:
+ lib = cdll.LoadLibrary(pfx + i + ext)
+ except OSError:
+ pass
+ else:
+ return lib
+
+ for i in names:
+ t = ctypes.util.find_library(i)
+ if t:
+ return cdll.LoadLibrary(t)
+ raise Exception('LLVM shared library not found!')
diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake
index b5f96e8f71..ca07c5c361 100644
--- a/cmake/modules/HandleLLVMOptions.cmake
+++ b/cmake/modules/HandleLLVMOptions.cmake
@@ -3,6 +3,7 @@
# selections.
include(AddLLVMDefinitions)
+include(CheckCCompilerFlag)
if( CMAKE_COMPILER_IS_GNUCXX )
set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON)
@@ -194,9 +195,13 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE )
if (LLVM_ENABLE_PEDANTIC)
add_llvm_definitions( -pedantic -Wno-long-long )
endif (LLVM_ENABLE_PEDANTIC)
- check_cxx_compiler_flag("-Werror -Wcovered-switch-default" SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG)
- if( SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG )
- add_llvm_definitions( -Wcovered-switch-default )
+ check_cxx_compiler_flag("-Werror -Wcovered-switch-default" CXX_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG)
+ if( CXX_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG )
+ set( CMAKE_CXX_FlAGS "${CMAKE_CXX_FLAGS} -Wcovered-switch-default" )
+ endif()
+ check_c_compiler_flag("-Werror -Wcovered-switch-default" C_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG)
+ if( C_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG )
+ set( CMAKE_C_FlAGS "${CMAKE_C_FLAGS} -Wcovered-switch-default" )
endif()
endif (LLVM_ENABLE_WARNINGS)
if (LLVM_ENABLE_WERROR)
diff --git a/configure b/configure
index 80b5c18c1c..47c084a3eb 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.60 for LLVM 3.2svn.
+# Generated by GNU Autoconf 2.60 for LLVM 3.3svn.
#
# Report bugs to <http://llvm.org/bugs/>.
#
@@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='LLVM'
PACKAGE_TARNAME='llvm'
-PACKAGE_VERSION='3.2svn'
-PACKAGE_STRING='LLVM 3.2svn'
+PACKAGE_VERSION='3.3svn'
+PACKAGE_STRING='LLVM 3.3svn'
PACKAGE_BUGREPORT='http://llvm.org/bugs/'
ac_unique_file="lib/VMCore/Module.cpp"
@@ -704,6 +704,7 @@ ENABLE_PIC
ENABLE_SHARED
ENABLE_EMBED_STDCXX
ENABLE_TIMESTAMPS
+ENABLE_BACKTRACES
TARGETS_TO_BUILD
LLVM_ENUM_TARGETS
LLVM_ENUM_ASM_PRINTERS
@@ -1320,7 +1321,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures LLVM 3.2svn to adapt to many kinds of systems.
+\`configure' configures LLVM 3.3svn to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1386,7 +1387,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of LLVM 3.2svn:";;
+ short | recursive ) echo "Configuration of LLVM 3.3svn:";;
esac
cat <<\_ACEOF
@@ -1426,8 +1427,8 @@ Optional Features:
YES)
--enable-targets Build specific host targets: all or
target1,target2,... Valid targets are: host, x86,
- x86_64, sparc, powerpc, arm, mips, spu, hexagon,
- xcore, msp430, nvptx, and cpp (default=all)
+ x86_64, sparc, powerpc, arm, mips, hexagon, xcore,
+ msp430, nvptx, and cpp (default=all)
--enable-experimental-targets
Build experimental host targets: disable or
target1,target2,... (default=disable)
@@ -1539,7 +1540,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-LLVM configure 3.2svn
+LLVM configure 3.3svn
generated by GNU Autoconf 2.60
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1555,7 +1556,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by LLVM $as_me 3.2svn, which was
+It was created by LLVM $as_me 3.3svn, which was
generated by GNU Autoconf 2.60. Invocation command line was
$ $0 $@
@@ -1915,7 +1916,7 @@ _ACEOF
cat >>confdefs.h <<\_ACEOF
-#define LLVM_VERSION_MINOR 2
+#define LLVM_VERSION_MINOR 3
_ACEOF
@@ -5395,11 +5396,11 @@ else
fi
case "$enableval" in
- yes) ENABLE_TIMESTAMPS=1
+ yes) ENABLE_BACKTRACES=1
;;
- no) ENABLE_TIMESTAMPS=0
+ no) ENABLE_BACKTRACES=0
;;
- default) ENABLE_TIMESTAMPS=1
+ default) ENABLE_BACKTRACES=1
;;
*) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-backtraces. Use \"yes\" or \"no\"" >&5
echo "$as_me: error: Invalid setting for --enable-backtraces. Use \"yes\" or \"no\"" >&2;}
@@ -5423,7 +5424,7 @@ if test "$enableval" = host-only ; then
enableval=host
fi
case "$enableval" in
- all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;;
+ all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;;
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
case "$a_target" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -5435,7 +5436,6 @@ case "$enableval" in
mipsel) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
mips64) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
mips64el) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
- spu) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
xcore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
msp430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
@@ -5450,7 +5450,6 @@ case "$enableval" in
ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
MBlaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
- CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
@@ -10320,7 +10319,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<EOF
-#line 10318 "configure"
+#line 10317 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -21578,7 +21577,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by LLVM $as_me 3.2svn, which was
+This file was extended by LLVM $as_me 3.3svn, which was
generated by GNU Autoconf 2.60. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -21631,7 +21630,7 @@ Report bugs to <bug-autoconf@gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF
ac_cs_version="\\
-LLVM config.status 3.2svn
+LLVM config.status 3.3svn
configured by $0, generated by GNU Autoconf 2.60,
with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
@@ -21976,6 +21975,7 @@ ENABLE_PIC!$ENABLE_PIC$ac_delim
ENABLE_SHARED!$ENABLE_SHARED$ac_delim
ENABLE_EMBED_STDCXX!$ENABLE_EMBED_STDCXX$ac_delim
ENABLE_TIMESTAMPS!$ENABLE_TIMESTAMPS$ac_delim
+ENABLE_BACKTRACES!$ENABLE_BACKTRACES$ac_delim
TARGETS_TO_BUILD!$TARGETS_TO_BUILD$ac_delim
LLVM_ENUM_TARGETS!$LLVM_ENUM_TARGETS$ac_delim
LLVM_ENUM_ASM_PRINTERS!$LLVM_ENUM_ASM_PRINTERS$ac_delim
@@ -22067,7 +22067,7 @@ LIBOBJS!$LIBOBJS$ac_delim
LTLIBOBJS!$LTLIBOBJS$ac_delim
_ACEOF
- if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 94; then
+ if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 95; then
break
elif $ac_last_try; then
{ { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst
index bd26f7b150..333e79b864 100644
--- a/docs/BitCodeFormat.rst
+++ b/docs/BitCodeFormat.rst
@@ -54,8 +54,8 @@ structure. This structure consists of the following concepts:
* Abbreviations, which specify compression optimizations for the file.
-Note that the `llvm-bcanalyzer <CommandGuide/html/llvm-bcanalyzer.html>`_ tool
-can be used to dump and inspect arbitrary bitstreams, which is very useful for
+Note that the :doc:`llvm-bcanalyzer <CommandGuide/llvm-bcanalyzer>` tool can be
+used to dump and inspect arbitrary bitstreams, which is very useful for
understanding the encoding.
.. _magic number:
diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst
index 5fab76ec1a..11174b7bee 100644
--- a/docs/CodeGenerator.rst
+++ b/docs/CodeGenerator.rst
@@ -968,7 +968,8 @@ The ``FADDS`` instruction is a simple binary single-precision add instruction.
To perform this pattern match, the PowerPC backend includes the following
instruction definitions:
-::
+.. code-block:: text
+ :emphasize-lines: 4-5,9
def FMADDS : AForm_1<59, 29,
(ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
@@ -980,10 +981,10 @@ instruction definitions:
"fadds $FRT, $FRA, $FRB",
[(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>;
-The portion of the instruction definition in bold indicates the pattern used to
-match the instruction. The DAG operators (like ``fmul``/``fadd``) are defined
-in the ``include/llvm/Target/TargetSelectionDAG.td`` file. " ``F4RC``" is the
-register class of the input and result values.
+The highlighted portion of the instruction definitions indicates the pattern
+used to match the instructions. The DAG operators (like ``fmul``/``fadd``)
+are defined in the ``include/llvm/Target/TargetSelectionDAG.td`` file.
+"``F4RC``" is the register class of the input and result values.
The TableGen DAG instruction selector generator reads the instruction patterns
in the ``.td`` file and automatically builds parts of the pattern matching code
@@ -1762,7 +1763,6 @@ Here is the table:
:raw-html:`<tr>`
:raw-html:`<th>Feature</th>`
:raw-html:`<th>ARM</th>`
-:raw-html:`<th>CellSPU</th>`
:raw-html:`<th>Hexagon</th>`
:raw-html:`<th>MBlaze</th>`
:raw-html:`<th>MSP430</th>`
@@ -1777,7 +1777,6 @@ Here is the table:
:raw-html:`<tr>`
:raw-html:`<td><a href="#feat_reliable">is generally reliable</a></td>`
:raw-html:`<td class="yes"></td> <!-- ARM -->`
-:raw-html:`<td class="no"></td> <!-- CellSPU -->`
:raw-html:`<td class="yes"></td> <!-- Hexagon -->`
:raw-html:`<td class="no"></td> <!-- MBlaze -->`
:raw-html:`<td class="unknown"></td> <!-- MSP430 -->`
@@ -1792,7 +1791,6 @@ Here is the table:
:raw-html:`<tr>`
:raw-html:`<td><a href="#feat_asmparser">assembly parser</a></td>`
:raw-html:`<td class="no"></td> <!-- ARM -->`
-:raw-html:`<td class="no"></td> <!-- CellSPU -->`
:raw-html:`<td class="no"></td> <!-- Hexagon -->`
:raw-html:`<td class="yes"></td> <!-- MBlaze -->`
:raw-html:`<td class="no"></td> <!-- MSP430 -->`
@@ -1807,7 +1805,6 @@ Here is the table:
:raw-html:`<tr>`
:raw-html:`<td><a href="#feat_disassembler">disassembler</a></td>`
:raw-html:`<td class="yes"></td> <!-- ARM -->`
-:raw-html:`<td class="no"></td> <!-- CellSPU -->`
:raw-html:`<td class="no"></td> <!-- Hexagon -->`
:raw-html:`<td class="yes"></td> <!-- MBlaze -->`
:raw-html:`<td class="no"></td> <!-- MSP430 -->`
@@ -1822,7 +1819,6 @@ Here is the table:
:raw-html:`<tr>`
:raw-html:`<td><a href="#feat_inlineasm">inline asm</a></td>`
:raw-html:`<td class="yes"></td> <!-- ARM -->`
-:raw-html:`<td class="no"></td> <!-- CellSPU -->`
:raw-html:`<td class="yes"></td> <!-- Hexagon -->`
:raw-html:`<td class="yes"></td> <!-- MBlaze -->`
:raw-html:`<td class="unknown"></td> <!-- MSP430 -->`
@@ -1837,7 +1833,6 @@ Here is the table:
:raw-html:`<tr>`
:raw-html:`<td><a href="#feat_jit">jit</a></td>`
:raw-html:`<td class="partial"><a href="#feat_jit_arm">*</a></td> <!-- ARM -->`
-:raw-html:`<td class="no"></td> <!-- CellSPU -->`
:raw-html:`<td class="no"></td> <!-- Hexagon -->`
:raw-html:`<td class="no"></td> <!-- MBlaze -->`
:raw-html:`<td class="unknown"></td> <!-- MSP430 -->`
@@ -1852,7 +1847,6 @@ Here is the table:
:raw-html:`<tr>`
:raw-html:`<td><a href="#feat_objectwrite">.o&nbsp;file writing</a></td>`
:raw-html:`<td class="no"></td> <!-- ARM -->`
-:raw-html:`<td class="no"></td> <!-- CellSPU -->`
:raw-html:`<td class="no"></td> <!-- Hexagon -->`
:raw-html:`<td class="yes"></td> <!-- MBlaze -->`
:raw-html:`<td class="no"></td> <!-- MSP430 -->`
@@ -1867,7 +1861,6 @@ Here is the table:
:raw-html:`<tr>`
:raw-html:`<td><a hr:raw-html:`ef="#feat_tailcall">tail calls</a></td>`
:raw-html:`<td class="yes"></td> <!-- ARM -->`
-:raw-html:`<td class="no"></td> <!-- CellSPU -->`
:raw-html:`<td class="yes"></td> <!-- Hexagon -->`
:raw-html:`<td class="no"></td> <!-- MBlaze -->`
:raw-html:`<td class="unknown"></td> <!-- MSP430 -->`
@@ -1882,7 +1875,6 @@ Here is the table:
:raw-html:`<tr>`
:raw-html:`<td><a href="#feat_segstacks">segmented stacks</a></td>`
:raw-html:`<td class="no"></td> <!-- ARM -->`
-:raw-html:`<td class="no"></td> <!-- CellSPU -->`
:raw-html:`<td class="no"></td> <!-- Hexagon -->`
:raw-html:`<td class="no"></td> <!-- MBlaze -->`
:raw-html:`<td class="no"></td> <!-- MSP430 -->`
@@ -1991,8 +1983,8 @@ Tail call optimization
Tail call optimization, callee reusing the stack of the caller, is currently
supported on x86/x86-64 and PowerPC. It is performed if:
-* Caller and callee have the calling convention ``fastcc`` or ``cc 10`` (GHC
- call convention).
+* Caller and callee have the calling convention ``fastcc``, ``cc 10`` (GHC
+ calling convention) or ``cc 11`` (HiPE calling convention).
* The call is a tail call - in tail position (ret immediately follows call and
ret uses value of call or is void).
diff --git a/docs/CodingStandards.rst b/docs/CodingStandards.rst
index 90835307b1..2b6a6acb1f 100644
--- a/docs/CodingStandards.rst
+++ b/docs/CodingStandards.rst
@@ -409,7 +409,8 @@ code.
That said, LLVM does make extensive use of a hand-rolled form of RTTI that use
templates like `isa<>, cast<>, and dyn_cast<> <ProgrammersManual.html#isa>`_.
-This form of RTTI is opt-in and can be added to any class. It is also
+This form of RTTI is opt-in and can be
+:doc:`added to any class <HowToSetUpLLVMStyleRTTI>`. It is also
substantially more efficient than ``dynamic_cast<>``.
.. _static constructor:
@@ -713,8 +714,8 @@ sort of thing is:
.. code-block:: c++
bool FoundFoo = false;
- for (unsigned i = 0, e = BarList.size(); i != e; ++i)
- if (BarList[i]->isFoo()) {
+ for (unsigned I = 0, E = BarList.size(); I != E; ++I)
+ if (BarList[I]->isFoo()) {
FoundFoo = true;
break;
}
@@ -732,8 +733,8 @@ code to be structured like this:
/// \returns true if the specified list has an element that is a foo.
static bool containsFoo(const std::vector<Bar*> &List) {
- for (unsigned i = 0, e = List.size(); i != e; ++i)
- if (List[i]->isFoo())
+ for (unsigned I = 0, E = List.size(); I != E; ++I)
+ if (List[I]->isFoo())
return true;
return false;
}
@@ -820,8 +821,8 @@ Here are some examples of good and bad names:
Vehicle MakeVehicle(VehicleType Type) {
VehicleMaker M; // Might be OK if having a short life-span.
- Tire tmp1 = M.makeTire(); // Bad -- 'tmp1' provides no information.
- Light headlight = M.makeLight("head"); // Good -- descriptive.
+ Tire Tmp1 = M.makeTire(); // Bad -- 'Tmp1' provides no information.
+ Light Headlight = M.makeLight("head"); // Good -- descriptive.
...
}
@@ -841,9 +842,9 @@ enforced, and hopefully what to do about it. Here is one complete example:
.. code-block:: c++
- inline Value *getOperand(unsigned i) {
- assert(i < Operands.size() && "getOperand() out of range!");
- return Operands[i];
+ inline Value *getOperand(unsigned I) {
+ assert(I < Operands.size() && "getOperand() out of range!");
+ return Operands[I];
}
Here are more examples:
@@ -1035,7 +1036,7 @@ form has two problems. First it may be less efficient than evaluating it at the
start of the loop. In this case, the cost is probably minor --- a few extra
loads every time through the loop. However, if the base expression is more
complex, then the cost can rise quickly. I've seen loops where the end
-expression was actually something like: "``SomeMap[x]->end()``" and map lookups
+expression was actually something like: "``SomeMap[X]->end()``" and map lookups
really aren't cheap. By writing it in the second form consistently, you
eliminate the issue entirely and don't even have to think about it.
@@ -1111,27 +1112,27 @@ macros. For example, this is good:
.. code-block:: c++
- if (x) ...
- for (i = 0; i != 100; ++i) ...
- while (llvm_rocks) ...
+ if (X) ...
+ for (I = 0; I != 100; ++I) ...
+ while (LLVMRocks) ...
somefunc(42);
assert(3 != 4 && "laws of math are failing me");
- a = foo(42, 92) + bar(x);
+ A = foo(42, 92) + bar(X);
and this is bad:
.. code-block:: c++
- if(x) ...
- for(i = 0; i != 100; ++i) ...
- while(llvm_rocks) ...
+ if(X) ...
+ for(I = 0; I != 100; ++I) ...
+ while(LLVMRocks) ...
somefunc (42);
assert (3 != 4 && "laws of math are failing me");
- a = foo (42, 92) + bar (x);
+ A = foo (42, 92) + bar (X);
The reason for doing this is not completely arbitrary. This style makes control
flow operators stand out more, and makes expressions flow better. The function
@@ -1139,11 +1140,11 @@ call operator binds very tightly as a postfix operator. Putting a space after a
function name (as in the last example) makes it appear that the code might bind
the arguments of the left-hand-side of a binary operator with the argument list
of a function and the name of the right side. More specifically, it is easy to
-misread the "``a``" example as:
+misread the "``A``" example as:
.. code-block:: c++
- a = foo ((42, 92) + bar) (x);
+ A = foo ((42, 92) + bar) (X);
when skimming through the code. By avoiding a space in a function, we avoid
this misinterpretation.
diff --git a/docs/CommandGuide/FileCheck.rst b/docs/CommandGuide/FileCheck.rst
index 1d7a462bd7..5e145f620f 100644
--- a/docs/CommandGuide/FileCheck.rst
+++ b/docs/CommandGuide/FileCheck.rst
@@ -1,18 +1,14 @@
FileCheck - Flexible pattern matching file verifier
===================================================
-
SYNOPSIS
--------
-
**FileCheck** *match-filename* [*--check-prefix=XXX*] [*--strict-whitespace*]
-
DESCRIPTION
-----------
-
**FileCheck** reads two files (one from standard input, and one specified on the
command line) and uses one to verify the other. This behavior is particularly
useful for the testsuite, which wants to verify that the output of some tool
@@ -23,77 +19,61 @@ for matching multiple different inputs in one file in a specific order.
The *match-filename* file specifies the file that contains the patterns to
match. The file to verify is always read from standard input.
-
OPTIONS
-------
-
-
**-help**
Print a summary of command line options.
-
-
**--check-prefix** *prefix*
FileCheck searches the contents of *match-filename* for patterns to match. By
- default, these patterns are prefixed with "CHECK:". If you'd like to use a
+ default, these patterns are prefixed with "``CHECK:``". If you'd like to use a
different prefix (e.g. because the same input file is checking multiple
different tool or options), the **--check-prefix** argument allows you to specify
a specific prefix to match.
-
-
**--input-file** *filename*
File to check (defaults to stdin).
-
**--strict-whitespace**
By default, FileCheck canonicalizes input horizontal whitespace (spaces and
tabs) which causes it to ignore these differences (a space will match a tab).
- The --strict-whitespace argument disables this behavior.
-
+ The **--strict-whitespace** argument disables this behavior.
**-version**
Show the version number of this program.
-
-
-
EXIT STATUS
-----------
-
If **FileCheck** verifies that the file matches the expected contents, it exits
with 0. Otherwise, if not, or if an error occurs, it will exit with a non-zero
value.
-
TUTORIAL
--------
-
FileCheck is typically used from LLVM regression tests, being invoked on the RUN
line of the test. A simple example of using FileCheck from a RUN line looks
like this:
-
.. code-block:: llvm
; RUN: llvm-as < %s | llc -march=x86-64 | FileCheck %s
-This syntax says to pipe the current file ("%s") into llvm-as, pipe that into
-llc, then pipe the output of llc into FileCheck. This means that FileCheck will
-be verifying its standard input (the llc output) against the filename argument
-specified (the original .ll file specified by "%s"). To see how this works,
-let's look at the rest of the .ll file (after the RUN line):
-
+This syntax says to pipe the current file ("``%s``") into ``llvm-as``, pipe
+that into ``llc``, then pipe the output of ``llc`` into ``FileCheck``. This
+means that FileCheck will be verifying its standard input (the llc output)
+against the filename argument specified (the original ``.ll`` file specified by
+"``%s``"). To see how this works, let's look at the rest of the ``.ll`` file
+(after the RUN line):
.. code-block:: llvm
@@ -114,32 +94,30 @@ let's look at the rest of the .ll file (after the RUN line):
}
-Here you can see some "CHECK:" lines specified in comments. Now you can see
-how the file is piped into llvm-as, then llc, and the machine code output is
-what we are verifying. FileCheck checks the machine code output to verify that
-it matches what the "CHECK:" lines specify.
+Here you can see some "``CHECK:``" lines specified in comments. Now you can
+see how the file is piped into ``llvm-as``, then ``llc``, and the machine code
+output is what we are verifying. FileCheck checks the machine code output to
+verify that it matches what the "``CHECK:``" lines specify.
-The syntax of the CHECK: lines is very simple: they are fixed strings that
+The syntax of the "``CHECK:``" lines is very simple: they are fixed strings that
must occur in order. FileCheck defaults to ignoring horizontal whitespace
differences (e.g. a space is allowed to match a tab) but otherwise, the contents
-of the CHECK: line is required to match some thing in the test file exactly.
+of the "``CHECK:``" line is required to match some thing in the test file exactly.
One nice thing about FileCheck (compared to grep) is that it allows merging
test cases together into logical groups. For example, because the test above
-is checking for the "sub1:" and "inc4:" labels, it will not match unless there
-is a "subl" in between those labels. If it existed somewhere else in the file,
-that would not count: "grep subl" matches if subl exists anywhere in the
-file.
+is checking for the "``sub1:``" and "``inc4:``" labels, it will not match
+unless there is a "``subl``" in between those labels. If it existed somewhere
+else in the file, that would not count: "``grep subl``" matches if "``subl``"
+exists anywhere in the file.
The FileCheck -check-prefix option
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The FileCheck -check-prefix option allows multiple test configurations to be
+The FileCheck ``-check-prefix`` option allows multiple test configurations to be
driven from one .ll file. This is useful in many circumstances, for example,
testing different architectural variants with llc. Here's a simple example:
-
.. code-block:: llvm
; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin9 -mattr=sse41 \
@@ -157,21 +135,17 @@ testing different architectural variants with llc. Here's a simple example:
; X64: pinsrd $1, %edi, %xmm0
}
-
In this case, we're testing that we get the expected code generation with
both 32-bit and 64-bit code generation.
-
The "CHECK-NEXT:" directive
~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
Sometimes you want to match lines and would like to verify that matches
happen on exactly consecutive lines with no other lines in between them. In
-this case, you can use CHECK: and CHECK-NEXT: directives to specify this. If
-you specified a custom check prefix, just use "<PREFIX>-NEXT:". For
-example, something like this works as you'd expect:
-
+this case, you can use "``CHECK:``" and "``CHECK-NEXT:``" directives to specify
+this. If you specified a custom check prefix, just use "``<PREFIX>-NEXT:``".
+For example, something like this works as you'd expect:
.. code-block:: llvm
@@ -193,22 +167,18 @@ example, something like this works as you'd expect:
; CHECK-NEXT: ret
}
-
-CHECK-NEXT: directives reject the input unless there is exactly one newline
-between it an the previous directive. A CHECK-NEXT cannot be the first
-directive in a file.
-
+"``CHECK-NEXT:``" directives reject the input unless there is exactly one
+newline between it and the previous directive. A "``CHECK-NEXT:``" cannot be
+the first directive in a file.
The "CHECK-NOT:" directive
~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The CHECK-NOT: directive is used to verify that a string doesn't occur
+The "``CHECK-NOT:``" directive is used to verify that a string doesn't occur
between two matches (or before the first match, or after the last match). For
example, to verify that a load is removed by a transformation, a test like this
can be used:
-
.. code-block:: llvm
define i8 @coerce_offset0(i32 %V, i32* %P) {
@@ -225,26 +195,22 @@ can be used:
}
-
FileCheck Pattern Matching Syntax
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The CHECK: and CHECK-NOT: directives both take a pattern to match. For most
-uses of FileCheck, fixed string matching is perfectly sufficient. For some
-things, a more flexible form of matching is desired. To support this, FileCheck
-allows you to specify regular expressions in matching strings, surrounded by
-double braces: **{{yourregex}}**. Because we want to use fixed string
-matching for a majority of what we do, FileCheck has been designed to support
-mixing and matching fixed string matching with regular expressions. This allows
-you to write things like this:
-
+The "``CHECK:``" and "``CHECK-NOT:``" directives both take a pattern to match.
+For most uses of FileCheck, fixed string matching is perfectly sufficient. For
+some things, a more flexible form of matching is desired. To support this,
+FileCheck allows you to specify regular expressions in matching strings,
+surrounded by double braces: ``{{yourregex}}``. Because we want to use fixed
+string matching for a majority of what we do, FileCheck has been designed to
+support mixing and matching fixed string matching with regular expressions.
+This allows you to write things like this:
.. code-block:: llvm
; CHECK: movhpd {{[0-9]+}}(%esp), {{%xmm[0-7]}}
-
In this case, any offset from the ESP register will be allowed, and any xmm
register will be allowed.
@@ -252,39 +218,64 @@ Because regular expressions are enclosed with double braces, they are
visually distinct, and you don't need to use escape characters within the double
braces like you would in C. In the rare case that you want to match double
braces explicitly from the input, you can use something ugly like
-**{{[{][{]}}** as your pattern.
-
+``{{[{][{]}}`` as your pattern.
FileCheck Variables
~~~~~~~~~~~~~~~~~~~
-
It is often useful to match a pattern and then verify that it occurs again
later in the file. For codegen tests, this can be useful to allow any register,
but verify that that register is used consistently later. To do this, FileCheck
allows named variables to be defined and substituted into patterns. Here is a
simple example:
-
.. code-block:: llvm
; CHECK: test5:
; CHECK: notw [[REGISTER:%[a-z]+]]
; CHECK: andw {{.*}}[[REGISTER]]
-
-The first check line matches a regex (**%[a-z]+**) and captures it into
-the variable "REGISTER". The second line verifies that whatever is in REGISTER
-occurs later in the file after an "andw". FileCheck variable references are
-always contained in **[[ ]]** pairs, and their names can be formed with the
-regex **[a-zA-Z][a-zA-Z0-9]***. If a colon follows the name, then it is a
-definition of the variable; otherwise, it is a use.
+The first check line matches a regex ``%[a-z]+`` and captures it into the
+variable ``REGISTER``. The second line verifies that whatever is in
+``REGISTER`` occurs later in the file after an "``andw``". FileCheck variable
+references are always contained in ``[[ ]]`` pairs, and their names can be
+formed with the regex ``[a-zA-Z][a-zA-Z0-9]*``. If a colon follows the name,
+then it is a definition of the variable; otherwise, it is a use.
FileCheck variables can be defined multiple times, and uses always get the
-latest value. Note that variables are all read at the start of a "CHECK" line
-and are all defined at the end. This means that if you have something like
-"**CHECK: [[XYZ:.\\*]]x[[XYZ]]**", the check line will read the previous
-value of the XYZ variable and define a new one after the match is performed. If
-you need to do something like this you can probably take advantage of the fact
-that FileCheck is not actually line-oriented when it matches, this allows you to
-define two separate CHECK lines that match on the same line.
+latest value. Note that variables are all read at the start of a "``CHECK``"
+line and are all defined at the end. This means that if you have something
+like "``CHECK: [[XYZ:.*]]x[[XYZ]]``", the check line will read the previous
+value of the ``XYZ`` variable and define a new one after the match is
+performed. If you need to do something like this you can probably take
+advantage of the fact that FileCheck is not actually line-oriented when it
+matches, this allows you to define two separate "``CHECK``" lines that match on
+the same line.
+
+
+FileCheck Expressions
+~~~~~~~~~~~~~~~~~~~~~
+
+
+Sometimes there's a need to verify output which refers line numbers of the match
+file, e.g. when testing compiler diagnostics. This introduces a certain
+fragility of the match file structure, as CHECK: lines contain absolute line
+numbers in the same file, which have to be updated whenever line numbers change
+due to text addition or deletion.
+
+To support this case, FileCheck allows using ``[[@LINE]]``,
+``[[@LINE+<offset>]]``, ``[[@LINE-<offset>]]`` expressions in patterns. These
+expressions expand to a number of the line where a pattern is located (with an
+optional integer offset).
+
+This way match patterns can be put near the relevant test lines and include
+relative line number references, for example:
+
+.. code-block:: c++
+
+ // CHECK: test.cpp:[[@LINE+4]]:6: error: expected ';' after top level declarator
+ // CHECK-NEXT: {{^int a}}
+ // CHECK-NEXT: {{^ \^}}
+ // CHECK-NEXT: {{^ ;}}
+ int a
+
diff --git a/docs/CommandGuide/lit.rst b/docs/CommandGuide/lit.rst
index 9e96cd2a4b..8886fe6a45 100644
--- a/docs/CommandGuide/lit.rst
+++ b/docs/CommandGuide/lit.rst
@@ -430,14 +430,14 @@ TEST RUN OUTPUT FORMAT
~~~~~~~~~~~~~~~~~~~~~~
-The b<lit> output for a test run conforms to the following schema, in both short
-and verbose modes (although in short mode no PASS lines will be shown). This
-schema has been chosen to be relatively easy to reliably parse by a machine (for
-example in buildbot log scraping), and for other tools to generate.
+The **lit** output for a test run conforms to the following schema, in both
+short and verbose modes (although in short mode no PASS lines will be shown).
+This schema has been chosen to be relatively easy to reliably parse by a machine
+(for example in buildbot log scraping), and for other tools to generate.
-Each test result is expected to appear on a line that matches:
+Each test result is expected to appear on a line that matches::
-<result code>: <test name> (<progress info>)
+ <result code>: <test name> (<progress info>)
where <result-code> is a standard test result such as PASS, FAIL, XFAIL, XPASS,
UNRESOLVED, or UNSUPPORTED. The performance result codes of IMPROVED and
@@ -449,11 +449,11 @@ The <progress info> field can be used to report progress information such as
(1/300) or can be empty, but even when empty the parentheses are required.
Each test result may include additional (multiline) log information in the
-following format.
+following format::
-<log delineator> TEST '(<test name>)' <trailing delineator>
-... log message ...
-<log delineator>
+ <log delineator> TEST '(<test name>)' <trailing delineator>
+ ... log message ...
+ <log delineator>
where <test name> should be the name of a preceding reported test, <log
delineator> is a string of '\*' characters *at least* four characters long (the
diff --git a/docs/CompilerWriterInfo.rst b/docs/CompilerWriterInfo.rst
index e41f5f9eec..7504d3c75a 100644
--- a/docs/CompilerWriterInfo.rst
+++ b/docs/CompilerWriterInfo.rst
@@ -87,7 +87,7 @@ Intel - Official manuals and docs
Other x86-specific information
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-* `Calling conventions for different C++ compilers and operating systems <http://www.agner.org/assem/calling_conventions.pdf>`_
+* `Calling conventions for different C++ compilers and operating systems <http://www.agner.org/optimize/calling_conventions.pdf>`_
Other relevant lists
--------------------
diff --git a/docs/DeveloperPolicy.rst b/docs/DeveloperPolicy.rst
index e35e729556..390901289d 100644
--- a/docs/DeveloperPolicy.rst
+++ b/docs/DeveloperPolicy.rst
@@ -180,8 +180,8 @@ Developers are required to create test cases for any bugs fixed and any new
features added. Some tips for getting your testcase approved:
* All feature and regression test cases are added to the ``llvm/test``
- directory. The appropriate sub-directory should be selected (see the `Testing
- Guide <TestingGuide.html>`_ for details).
+ directory. The appropriate sub-directory should be selected (see the
+ :doc:`Testing Guide <TestingGuide>` for details).
* Test cases should be written in `LLVM assembly language <LangRef.html>`_
unless the feature or regression being tested requires another language
diff --git a/docs/GCCFEBuildInstrs.html b/docs/GCCFEBuildInstrs.html
deleted file mode 100644
index 0caf9d8618..0000000000
--- a/docs/GCCFEBuildInstrs.html
+++ /dev/null
@@ -1,279 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
- "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
- <link rel="stylesheet" href="_static/llvm.css" type="text/css" media="screen">
- <title>Building the LLVM GCC Front-End</title>
-</head>
-<body>
-
-<h1>
- Building the LLVM GCC Front-End
-</h1>
-
-<ol>
- <li><a href="#instructions">Building llvm-gcc from Source</a></li>
- <li><a href="#ada">Building the Ada front-end</a></li>
- <li><a href="#fortran">Building the Fortran front-end</a></li>
- <li><a href="#license">License Information</a></li>
-</ol>
-
-<div class="doc_author">
- <p>Written by the LLVM Team</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="instructions">Building llvm-gcc from Source</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This section describes how to acquire and build llvm-gcc 4.2, which is based
-on the GCC 4.2.1 front-end. Supported languages are Ada, C, C++, Fortran,
-Objective-C and Objective-C++. Note that the instructions for building these
-front-ends are completely different (and much easier!) than those for building
-llvm-gcc3 in the past.</p>
-
-<ol>
- <li><p>Retrieve the appropriate llvm-gcc-4.2-<i>version</i>.source.tar.gz
- archive from the <a href="http://llvm.org/releases/">LLVM web
- site</a>.</p>
-
- <p>It is also possible to download the sources of the llvm-gcc front end
- from a read-only mirror using subversion. To check out the 4.2 code
- for first time use:</p>
-
-<div class="doc_code">
-<pre>
-svn co http://llvm.org/svn/llvm-project/llvm-gcc-4.2/trunk <i>dst-directory</i>
-</pre>
-</div>
-
- <p>After that, the code can be be updated in the destination directory
- using:</p>
-
-<div class="doc_code">
-<pre>svn update</pre>
-</div>
-
- <p>The mirror is brought up to date every evening.</p></li>
-
- <li>Follow the directions in the top-level <tt>README.LLVM</tt> file for
- up-to-date instructions on how to build llvm-gcc. See below for building
- with support for Ada or Fortran.
-</ol>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="ada">Building the Ada front-end</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-<p>Building with support for Ada amounts to following the directions in the
-top-level <tt>README.LLVM</tt> file, adding ",ada" to EXTRALANGS, for example:
-<tt>EXTRALANGS=,ada</tt></p>
-
-<p>There are some complications however:</p>
-
-<ol>
- <li><p>The only platform for which the Ada front-end is known to build is
- 32 bit intel x86 running linux. It is unlikely to build for other
- systems without some work.</p></li>
- <li><p>The build requires having a compiler that supports Ada, C and C++.
- The Ada front-end is written in Ada so an Ada compiler is needed to
- build it. Compilers known to work with the
- <a href="http://llvm.org/releases/download.html">LLVM 2.7 release</a>
- are <a href="http://gcc.gnu.org/releases.html">gcc-4.2</a> and the
- 2005, 2006 and 2007 versions of the
- <a href="http://libre.adacore.com/">GNAT GPL Edition</a>.
- <b>GNAT GPL 2008, gcc-4.3 and later will not work</b>.
- The LLVM parts of llvm-gcc are written in C++ so a C++ compiler is
- needed to build them. The rest of gcc is written in C.
- Some linux distributions provide a version of gcc that supports all
- three languages (the Ada part often comes as an add-on package to
- the rest of gcc). Otherwise it is possible to combine two versions
- of gcc, one that supports Ada and C (such as the
- <a href="http://libre.adacore.com/">2007 GNAT GPL Edition</a>)
- and another which supports C++, see below.</p></li>
- <li><p>Because the Ada front-end is experimental, it is wise to build the
- compiler with checking enabled. This causes it to run much slower, but
- helps catch mistakes in the compiler (please report any problems using
- <a href="http://llvm.org/bugs/">LLVM bugzilla</a>).</p></li>
- <li><p>The Ada front-end <a href="http://llvm.org/PR2007">fails to
- bootstrap</a>, due to lack of LLVM support for
- <tt>setjmp</tt>/<tt>longjmp</tt> style exception handling (used
- internally by the compiler), so you must specify
- <tt>--disable-bootstrap</tt>.</p></li>
-</ol>
-
-<p>Supposing appropriate compilers are available, llvm-gcc with Ada support can
- be built on an x86-32 linux box using the following recipe:</p>
-
-<ol>
- <li><p>Download the <a href="http://llvm.org/releases/download.html">LLVM source</a>
- and unpack it:</p>
-
-<pre class="doc_code">
-wget http://llvm.org/releases/2.7/llvm-2.7.tgz
-tar xzf llvm-2.7.tgz
-mv llvm-2.7 llvm
-</pre>
-
- <p>or <a href="GettingStarted.html#checkout">check out the
- latest version from subversion</a>:</p>
-
-<pre class="doc_code">svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm</pre>
-
- </li>
-
- <li><p>Download the
- <a href="http://llvm.org/releases/download.html">llvm-gcc-4.2 source</a>
- and unpack it:</p>
-
-<pre class="doc_code">
-wget http://llvm.org/releases/2.7/llvm-gcc-4.2-2.7.source.tgz
-tar xzf llvm-gcc-4.2-2.7.source.tgz
-mv llvm-gcc-4.2-2.7.source llvm-gcc-4.2
-</pre>
-
- <p>or <a href="GettingStarted.html#checkout">check out the
- latest version from subversion</a>:</p>
-
-<pre class="doc_code">
-svn co http://llvm.org/svn/llvm-project/llvm-gcc-4.2/trunk llvm-gcc-4.2
-</pre>
- </li>
-
- <li><p>Make a build directory <tt>llvm-objects</tt> for llvm and make it the
- current directory:</p>
-
-<pre class="doc_code">
-mkdir llvm-objects
-cd llvm-objects
-</pre>
- </li>
-
- <li><p>Configure LLVM (here it is configured to install into <tt>/usr/local</tt>):</p>
-
-<pre class="doc_code">
-../llvm/configure --prefix=<b>/usr/local</b> --enable-optimized --enable-assertions
-</pre>
-
- <p>If you have a multi-compiler setup and the C++ compiler is not the
- default, then you can configure like this:</p>
-
-<pre class="doc_code">
-CXX=<b>PATH_TO_C++_COMPILER</b> ../llvm/configure --prefix=<b>/usr/local</b> --enable-optimized --enable-assertions
-</pre>
-
- <p>To compile without checking (not recommended), replace
- <tt>--enable-assertions</tt> with <tt>--disable-assertions</tt>.</p>
-
- </li>
-
- <li><p>Build LLVM:</p>
-
-<pre class="doc_code">
-make
-</pre>
- </li>
-
- <li><p>Install LLVM (optional):</p>
-
-<pre class="doc_code">
-make install
-</pre>
- </li>
-
- <li><p>Make a build directory <tt>llvm-gcc-4.2-objects</tt> for llvm-gcc and make it the
- current directory:</p>
-
-<pre class="doc_code">
-cd ..
-mkdir llvm-gcc-4.2-objects
-cd llvm-gcc-4.2-objects
-</pre>
- </li>
-
- <li><p>Configure llvm-gcc (here it is configured to install into <tt>/usr/local</tt>).
- The <tt>--enable-checking</tt> flag turns on sanity checks inside the compiler.
- To turn off these checks (not recommended), replace <tt>--enable-checking</tt>
- with <tt>--disable-checking</tt>.
- Additional languages can be appended to the <tt>--enable-languages</tt> switch,
- for example <tt>--enable-languages=ada,c,c++</tt>.</p>
-
-<pre class="doc_code">
-../llvm-gcc-4.2/configure --prefix=<b>/usr/local</b> --enable-languages=ada,c \
- --enable-checking --enable-llvm=$PWD/../llvm-objects \
- --disable-bootstrap --disable-multilib
-</pre>
-
- <p>If you have a multi-compiler setup, then you can configure like this:</p>
-
-<pre class="doc_code">
-export CC=<b>PATH_TO_C_AND_ADA_COMPILER</b>
-export CXX=<b>PATH_TO_C++_COMPILER</b>
-../llvm-gcc-4.2/configure --prefix=<b>/usr/local</b> --enable-languages=ada,c \
- --enable-checking --enable-llvm=$PWD/../llvm-objects \
- --disable-bootstrap --disable-multilib
-</pre>
- </li>
-
- <li><p>Build and install the compiler:</p>
-
-<pre class="doc_code">
-make
-make install
-</pre>
- </li>
-</ol>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="fortran">Building the Fortran front-end</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-<p>To build with support for Fortran, follow the directions in the top-level
-<tt>README.LLVM</tt> file, adding ",fortran" to EXTRALANGS, for example:</p>
-
-<pre class="doc_code">
-EXTRALANGS=,fortran
-</pre>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="license">License Information</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-<p>
-The LLVM GCC frontend is licensed to you under the GNU General Public License
-and the GNU Lesser General Public License. Please see the files COPYING and
-COPYING.LIB for more details.
-</p>
-
-<p>
-More information is <a href="FAQ.html#license">available in the FAQ</a>.
-</p>
-</div>
-
-<!-- *********************************************************************** -->
-
-<hr>
-<address>
- <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
- src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
- <a href="http://validator.w3.org/check/referer"><img
- src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
- <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
- Last modified: $Date$
-</address>
-
-</body>
-</html>
diff --git a/docs/GettingStarted.rst b/docs/GettingStarted.rst
index 68768921f6..f17313506c 100644
--- a/docs/GettingStarted.rst
+++ b/docs/GettingStarted.rst
@@ -583,7 +583,7 @@ git-imap-send. Here is an example to generate the patchset in Gmail's [Drafts].
Then, your .git/config should have [imap] sections.
-.. code-block:: bash
+.. code-block:: ini
[imap]
host = imaps://imap.gmail.com
@@ -842,12 +842,39 @@ any subdirectories that it contains. Entering any directory inside the LLVM
object tree and typing ``gmake`` should rebuild anything in or below that
directory that is out of date.
+This does not apply to building the documentation.
+LLVM's (non-Doxygen) documentation is produced with the
+`Sphinx <http://sphinx-doc.org/>`_ documentation generation system.
+There are some HTML documents that have not yet been converted to the new
+system (which uses the easy-to-read and easy-to-write
+`reStructuredText <http://sphinx-doc.org/rest.html>`_ plaintext markup
+language).
+The generated documentation is built in the ``SRC_ROOT/docs`` directory using
+a special makefile.
+For instructions on how to install Sphinx, see
+`Sphinx Introduction for LLVM Developers
+<http://lld.llvm.org/sphinx_intro.html>`_.
+After following the instructions there for installing Sphinx, build the LLVM
+HTML documentation by doing the following:
+
+.. code-block:: bash
+
+ $ cd SRC_ROOT/docs
+ $ make -f Makefile.sphinx
+
+This creates a ``_build/html`` sub-directory with all of the HTML files, not
+just the generated ones.
+This directory corresponds to ``llvm.org/docs``.
+For example, ``_build/html/SphinxQuickstartTemplate.html`` corresponds to
+``llvm.org/docs/SphinxQuickstartTemplate.html``.
+The :doc:`SphinxQuickstartTemplate` is useful when creating a new document.
+
Cross-Compiling LLVM
--------------------
It is possible to cross-compile LLVM itself. That is, you can create LLVM
executables and libraries to be hosted on a platform different from the platform
-where they are build (a Canadian Cross build). To configure a cross-compile,
+where they are built (a Canadian Cross build). To configure a cross-compile,
supply the configure script with ``--build`` and ``--host`` options that are
different. The values of these options must be legal target triples that your
GCC compiler supports.
@@ -1073,8 +1100,8 @@ module that must be checked out (usually to ``projects/test-suite``). This
module contains a comprehensive correctness, performance, and benchmarking test
suite for LLVM. It is a separate Subversion module because not every LLVM user
is interested in downloading or building such a comprehensive test suite. For
-further details on this test suite, please see the `Testing
-Guide <TestingGuide.html>`_ document.
+further details on this test suite, please see the :doc:`Testing Guide
+<TestingGuide>` document.
.. _tools:
@@ -1250,8 +1277,8 @@ Example with clang
% lli hello.bc
- The second examples shows how to invoke the LLVM JIT, `lli
- <CommandGuide/html/lli.html>`_.
+ The second examples shows how to invoke the LLVM JIT, :doc:`lli
+ <CommandGuide/lli>`.
#. Use the ``llvm-dis`` utility to take a look at the LLVM assembly code:
diff --git a/docs/HowToUseInstrMappings.rst b/docs/HowToUseInstrMappings.rst
index b51e74e23c..bf9278e770 100755..100644
--- a/docs/HowToUseInstrMappings.rst
+++ b/docs/HowToUseInstrMappings.rst
@@ -120,7 +120,7 @@ to include relevant information in its definition. For example, consider
following to be the current definitions of ADD, ADD_pt (true) and ADD_pf (false)
instructions:
-.. code-block::llvm
+.. code-block:: llvm
def ADD : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$a, IntRegs:$b),
"$dst = add($a, $b)",
@@ -141,7 +141,7 @@ In this step, we modify these instructions to include the information
required by the relationship model, <tt>getPredOpcode</tt>, so that they can
be related.
-.. code-block::llvm
+.. code-block:: llvm
def ADD : PredRel, ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$a, IntRegs:$b),
"$dst = add($a, $b)",
diff --git a/docs/LLVMBuild.html b/docs/LLVMBuild.html
deleted file mode 100644
index 9e7f8c7657..0000000000
--- a/docs/LLVMBuild.html
+++ /dev/null
@@ -1,368 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
- <title>LLVMBuild Documentation</title>
- <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-<body>
-
-<h1>LLVMBuild Guide</h1>
-
-<ol>
- <li><a href="#introduction">Introduction</a></li>
- <li><a href="#projectorg">Project Organization</a></li>
- <li><a href="#buildintegration">Build Integration</a></li>
- <li><a href="#componentoverview">Component Overview</a></li>
- <li><a href="#formatreference">Format Reference</a></li>
-</ol>
-
-<!-- *********************************************************************** -->
-<h2><a name="introduction">Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
- <p>This document describes the <tt>LLVMBuild</tt> organization and files which
- we use to describe parts of the LLVM ecosystem. For description of specific
- LLVMBuild related tools, please see the command guide.</p>
-
- <p>LLVM is designed to be a modular set of libraries which can be flexibly
- mixed together in order to build a variety of tools, like compilers, JITs,
- custom code generators, optimization passes, interpreters, and so on. Related
- projects in the LLVM system like Clang and LLDB also tend to follow this
- philosophy.</p>
-
- <p>In order to support this usage style, LLVM has a fairly strict structure as
- to how the source code and various components are organized. The
- <tt>LLVMBuild.txt</tt> files are the explicit specification of that structure,
- and are used by the build systems and other tools in order to develop the LLVM
- project.</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="projectorg">Project Organization</a></h2>
-<!-- *********************************************************************** -->
-
-<!-- FIXME: We should probably have an explicit top level project object. Good
-place to hang project level data, name, etc. Also useful for serving as the
-$ROOT of project trees for things which can be checked out separately. -->
-
-<div>
- <p>The source code for LLVM projects using the LLVMBuild system (LLVM, Clang,
- and LLDB) is organized into <em>components</em>, which define the separate
- pieces of functionality that make up the project. These projects may consist
- of many libraries, associated tools, build tools, or other utility tools (for
- example, testing tools).</p>
-
- <p>For the most part, the project contents are organized around defining one
- main component per each subdirectory. Each such directory contains
- an <tt>LLVMBuild.txt</tt> which contains the component definitions.</p>
-
- <p>The component descriptions for the project as a whole are automatically
- gathered by the LLVMBuild tools. The tools automatically traverse the source
- directory structure to find all of the component description files. NOTE: For
- performance/sanity reasons, we only traverse into subdirectories when the
- parent itself contains an <tt>LLVMBuild.txt</tt> description file.</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="buildintegration">Build Integration</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
- <p>The LLVMBuild files themselves are just a declarative way to describe the
- project structure. The actual building of the LLVM project is handled by
- another build system (currently we support
- both <a href="MakefileGuide.html">Makefiles</a>
- and <a href="CMake.html">CMake</a>.</p>
-
- <p>The build system implementation will load the relevant contents of the
- LLVMBuild files and use that to drive the actual project build. Typically, the
- build system will only need to load this information at "configure" time, and
- use it to generative native information. Build systems will also handle
- automatically reconfiguring their information when the contents of
- the <i>LLVMBuild.txt</i> files change.</p>
-
- <p>Developers generally are not expected to need to be aware of the details of
- how the LLVMBuild system is integrated into their build. Ideally, LLVM
- developers who are not working on the build system would only ever need to
- modify the contents of the <i>LLVMBuild.txt</i> description files (although we
- have not reached this goal yet).</p>
-
- <p>For more information on the utility tool we provide to help interfacing
- with the build system, please see
- the <a href="CommandGuide/html/llvm-build.html">llvm-build</a>
- documentation.</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="componentoverview">Component Overview</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
- <p>As mentioned earlier, LLVM projects are organized into
- logical <em>components</em>. Every component is typically grouped into its
- own subdirectory. Generally, a component is organized around a coherent group
- of sources which have some kind of clear API separation from other parts of
- the code.</p>
-
- <p>LLVM primarily uses the following types of components:</p>
- <ul>
- <li><em>Libraries</em> - Library components define a distinct API which can
- be independently linked into LLVM client applications. Libraries typically
- have private and public header files, and may specify a link of required
- libraries that they build on top of.</li>
-
- <li><em>Build Tools</em> - Build tools are applications which are designed
- to be run as part of the build process (typically to generate other source
- files). Currently, LLVM uses one main build tool
- called <a href="TableGenFundamentals.html">TableGen</a> to generate a
- variety of source files.</li>
-
- <li><em>Tools</em> - Command line applications which are built using the
- LLVM component libraries. Most LLVM tools are small and are primarily
- frontends to the library interfaces.</li>
-
-<!-- FIXME: We also need shared libraries as a first class component, but this
- is not yet implemented. -->
- </ul>
-
- <p>Components are described using <em>LLVMBuild.txt</em> files in the
- directories that define the component. See
- the <a href="#formatreference">Format Reference</a> section for information on
- the exact format of these files.</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="formatreference">LLVMBuild Format Reference</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
- <p>LLVMBuild files are written in a simple variant of the INI or configuration
- file format (<a href="http://en.wikipedia.org/wiki/INI_file">Wikipedia
- entry</a>). The format defines a list of sections each of which may contain
- some number of properties. A simple example of the file format is below:</p>
- <div class="doc_code">
- <pre>
-<i>; Comments start with a semi-colon.</i>
-
-<i>; Sections are declared using square brackets.</i>
-[component_0]
-
-<i>; Properties are declared using '=' and are contained in the previous section.
-;
-; We support simple string and boolean scalar values and list values, where
-; items are separated by spaces. There is no support for quoting, and so
-; property values may not contain spaces.</i>
-property_name = property_value
-list_property_name = value_1 value_2 <em>...</em> value_n
-boolean_property_name = 1 <em>(or 0)</em>
-</pre>
- </div>
-
- <p>LLVMBuild files are expected to define a strict set of sections and
- properties. An typical component description file for a library
- component would look typically look like the following example:</p>
- <div class="doc_code">
- <pre>
-[component_0]
-type = Library
-name = Linker
-parent = Libraries
-required_libraries = Archive BitReader Core Support TransformUtils
-</pre>
- </div>
-
- <p>A full description of the exact sections and properties which are allowed
- follows.</p>
-
- <p>Each file may define exactly one common component, named "common". The
- common component may define the following properties:</p>
- <ul>
- <li><i>subdirectories</i> <b>[optional]</b>
- <p>If given, a list of the names of the subdirectories from the current
- subpath to search for additional LLVMBuild files.</p></li>
- </ul>
-
- <p>Each file may define multiple components. Each component is described by a
- section who name starts with "component". The remainder of the section name is
- ignored, but each section name must be unique. Typically components are just
- number in order for files with multiple components ("component_0",
- "component_1", and so on).<p>
-
- <p><b>Section names not matching this format (or the "common" section) are
- currently unused and are disallowed.</b></p>
-
- <p>Every component is defined by the properties in the section. The exact list
- of properties that are allowed depends on the component
- type. Components <b>may not</b> define any properties other than those
- expected by the component type.</p>
-
- <p>Every component must define the following properties:</p>
- <ul>
- <li><i>type</i> <b>[required]</b>
- <p>The type of the component. Supported component types are
- detailed below. Most components will define additional properties which
- may be required or optional.</p></li>
-
- <li><i>name</i> <b>[required]</b>
- <p>The name of the component. Names are required to be unique
- across the entire project.</p></li>
-
- <li><i>parent</i> <b>[required]</b>
- <p>The name of the logical parent of the component. Components are
- organized into a logical tree to make it easier to navigate and organize
- groups of components. The parents have no semantics as far as the project
- build is concerned, however. Typically, the parent will be the main
- component of the parent directory.</p>
-
- <!-- FIXME: Should we make the parent optional, and default to parent
- directories component? -->
-
- <p>Components may reference the root pseudo component using '$ROOT' to
- indicate they should logically be grouped at the top-level.</p>
- </li>
- </ul>
-
- <p>Components may define the following properties:</p>
- <ul>
- <li><i>dependencies</i> <b>[optional]</b>
- <p>If specified, a list of names of components which <i>must</i> be built
- prior to this one. This should only be exactly those components which
- produce some tool or source code required for building the
- component.</p>
-
- <p><em>NOTE:</em> Group and LibraryGroup components have no semantics for
- the actual build, and are not allowed to specify dependencies.</p></li>
- </ul>
-
- <p>The following section lists the available component types, as well as the
- properties which are associated with that component.</p>
-
- <ul>
- <li><i>type = Group</i>
- <p>Group components exist purely to allow additional arbitrary structuring
- of the logical components tree. For example, one might define a
- "Libraries" group to hold all of the root library components.</p>
-
- <p>Group components have no additionally properties.</p>
- </li>
-
- <li><i>type = Library</i>
- <p>Library components define an individual library which should be built
- from the source code in the component directory.</p>
-
- <p>Components with this type use the following properties:</p>
- <ul>
- <li><i>library_name</i> <b>[optional]</b>
- <p>If given, the name to use for the actual library file on disk. If
- not given, the name is derived from the component name
- itself.</p></li>
-
- <li><i>required_libraries</i> <b>[optional]</b>
- <p>If given, a list of the names of Library or LibraryGroup components
- which must also be linked in whenever this library is used. That is,
- the link time dependencies for this component. When tools are built,
- the build system will include the transitive closure of
- all <i>required_libraries</i> for the components the tool needs.</p></li>
-
- <li><i>add_to_library_groups</i> <b>[optional]</b>
- <p>If given, a list of the names of LibraryGroup components which this
- component is also part of. This allows nesting groups of
- components. For example, the <i>X86</i> target might define a library
- group for all of the <i>X86</i> components. That library group might
- then be included in the <i>all-targets</i> library group.</p></li>
-
- <li><i>installed</i> <b>[optional]</b> <b>[boolean]</b>
- <p>Whether this library is installed. Libraries that are not installed
- are only reported by <tt>llvm-config</tt> when it is run as part of a
- development directory.</p></li>
- </ul>
- </li>
-
- <li><i>type = LibraryGroup</i>
- <p>LibraryGroup components are a mechanism to allow easy definition of
- useful sets of related components. In particular, we use them to easily
- specify things like "all targets", or "all assembly printers".</p>
-
- <p>Components with this type use the following properties:</p>
- <ul>
- <li><i>required_libraries</i> <b>[optional]</b>
- <p>See the Library type for a description of this property.</p></li>
-
- <li><i>add_to_library_groups</i> <b>[optional]</b>
- <p>See the Library type for a description of this property.</p></li>
- </ul>
- </li>
-
- <li><i>type = TargetGroup</i>
- <p>TargetGroup components are an extension of LibraryGroups, specifically
- for defining LLVM targets (which are handled specially in a few
- places).</p>
-
- <p>The name of the component should always be the name of the target.</p>
-
- <p>Components with this type use the LibraryGroup properties in addition
- to:</p>
- <ul>
- <li><i>has_asmparser</i> <b>[optional]</b> <b>[boolean]</b>
- <p>Whether this target defines an assembly parser.</p></li>
- <li><i>has_asmprinter</i> <b>[optional]</b> <b>[boolean]</b>
- <p>Whether this target defines an assembly printer.</p></li>
- <li><i>has_disassembler</i> <b>[optional]</b> <b>[boolean]</b>
- <p>Whether this target defines a disassembler.</p></li>
- <li><i>has_jit</i> <b>[optional]</b> <b>[boolean]</b>
- <p>Whether this target supports JIT compilation.</p></li>
- </ul>
- </li>
-
- <li><i>type = Tool</i>
- <p>Tool components define standalone command line tools which should be
- built from the source code in the component directory and linked.</p>
-
- <p>Components with this type use the following properties:</p>
- <ul>
- <li><i>required_libraries</i> <b>[optional]</b>
-
- <p>If given, a list of the names of Library or LibraryGroup components
- which this tool is required to be linked with. <b>NOTE:</b> The values
- should be the component names, which may not always match up with the
- actual library names on disk.</p>
-
- <p>Build systems are expected to properly include all of the libraries
- required by the linked components (i.e., the transitive closer
- of <em>required_libraries</em>).</p>
-
- <p>Build systems are also expected to understand that those library
- components must be built prior to linking -- they do not also need to
- be listed under <i>dependencies</i>.</p></li>
- </ul>
- </li>
-
- <li><i>type = BuildTool</i>
- <p>BuildTool components are like Tool components, except that the tool is
- supposed to be built for the platform where the build is running (instead
- of that platform being targetted). Build systems are expected to handle
- the fact that required libraries may need to be built for multiple
- platforms in order to be able to link this tool.</p>
-
- <p>BuildTool components currently use the exact same properties as Tool
- components, the type distinction is only used to differentiate what the
- tool is built for.</p>
- </li>
- </ul>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
- <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
- src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
- <a href="http://validator.w3.org/check/referer"><img
- src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
- <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
- Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/LLVMBuild.rst b/docs/LLVMBuild.rst
new file mode 100644
index 0000000000..d9215dd8eb
--- /dev/null
+++ b/docs/LLVMBuild.rst
@@ -0,0 +1,325 @@
+===============
+LLVMBuild Guide
+===============
+
+.. contents::
+ :local:
+
+Introduction
+============
+
+This document describes the ``LLVMBuild`` organization and files which
+we use to describe parts of the LLVM ecosystem. For description of
+specific LLVMBuild related tools, please see the command guide.
+
+LLVM is designed to be a modular set of libraries which can be flexibly
+mixed together in order to build a variety of tools, like compilers,
+JITs, custom code generators, optimization passes, interpreters, and so
+on. Related projects in the LLVM system like Clang and LLDB also tend to
+follow this philosophy.
+
+In order to support this usage style, LLVM has a fairly strict structure
+as to how the source code and various components are organized. The
+``LLVMBuild.txt`` files are the explicit specification of that
+structure, and are used by the build systems and other tools in order to
+develop the LLVM project.
+
+Project Organization
+====================
+
+The source code for LLVM projects using the LLVMBuild system (LLVM,
+Clang, and LLDB) is organized into *components*, which define the
+separate pieces of functionality that make up the project. These
+projects may consist of many libraries, associated tools, build tools,
+or other utility tools (for example, testing tools).
+
+For the most part, the project contents are organized around defining
+one main component per each subdirectory. Each such directory contains
+an ``LLVMBuild.txt`` which contains the component definitions.
+
+The component descriptions for the project as a whole are automatically
+gathered by the LLVMBuild tools. The tools automatically traverse the
+source directory structure to find all of the component description
+files. NOTE: For performance/sanity reasons, we only traverse into
+subdirectories when the parent itself contains an ``LLVMBuild.txt``
+description file.
+
+Build Integration
+=================
+
+The LLVMBuild files themselves are just a declarative way to describe
+the project structure. The actual building of the LLVM project is
+handled by another build system (currently we support both
+:doc:`Makefiles <MakefileGuide>` and :doc:`CMake <CMake>`).
+
+The build system implementation will load the relevant contents of the
+LLVMBuild files and use that to drive the actual project build.
+Typically, the build system will only need to load this information at
+"configure" time, and use it to generative native information. Build
+systems will also handle automatically reconfiguring their information
+when the contents of the ``LLVMBuild.txt`` files change.
+
+Developers generally are not expected to need to be aware of the details
+of how the LLVMBuild system is integrated into their build. Ideally,
+LLVM developers who are not working on the build system would only ever
+need to modify the contents of the ``LLVMBuild.txt`` description files
+(although we have not reached this goal yet).
+
+For more information on the utility tool we provide to help interfacing
+with the build system, please see the :doc:`llvm-build
+<CommandGuide/llvm-build>` documentation.
+
+Component Overview
+==================
+
+As mentioned earlier, LLVM projects are organized into logical
+*components*. Every component is typically grouped into its own
+subdirectory. Generally, a component is organized around a coherent
+group of sources which have some kind of clear API separation from other
+parts of the code.
+
+LLVM primarily uses the following types of components:
+
+- *Libraries* - Library components define a distinct API which can be
+ independently linked into LLVM client applications. Libraries typically
+ have private and public header files, and may specify a link of required
+ libraries that they build on top of.
+- *Build Tools* - Build tools are applications which are designed to be run
+ as part of the build process (typically to generate other source files).
+ Currently, LLVM uses one main build tool called :doc:`TableGen
+ <TableGenFundamentals>` to generate a variety of source files.
+- *Tools* - Command line applications which are built using the LLVM
+ component libraries. Most LLVM tools are small and are primarily
+ frontends to the library interfaces.
+
+Components are described using ``LLVMBuild.txt`` files in the directories
+that define the component. See the `LLVMBuild Format Reference`_ section
+for information on the exact format of these files.
+
+LLVMBuild Format Reference
+==========================
+
+LLVMBuild files are written in a simple variant of the INI or configuration
+file format (`Wikipedia entry`_). The format defines a list of sections
+each of which may contain some number of properties. A simple example of
+the file format is below:
+
+.. _Wikipedia entry: http://en.wikipedia.org/wiki/INI_file
+
+.. code-block:: ini
+
+ ; Comments start with a semi-colon.
+
+ ; Sections are declared using square brackets.
+ [component_0]
+
+ ; Properties are declared using '=' and are contained in the previous section.
+ ;
+ ; We support simple string and boolean scalar values and list values, where
+ ; items are separated by spaces. There is no support for quoting, and so
+ ; property values may not contain spaces.
+ property_name = property_value
+ list_property_name = value_1 value_2 ... value_n
+ boolean_property_name = 1 (or 0)
+
+LLVMBuild files are expected to define a strict set of sections and
+properties. An typical component description file for a library
+component would look typically look like the following example:
+
+.. code-block:: ini
+
+ [component_0]
+ type = Library
+ name = Linker
+ parent = Libraries
+ required_libraries = Archive BitReader Core Support TransformUtils
+
+A full description of the exact sections and properties which are
+allowed follows.
+
+Each file may define exactly one common component, named ``common``. The
+common component may define the following properties:
+
+- ``subdirectories`` **[optional]**
+
+ If given, a list of the names of the subdirectories from the current
+ subpath to search for additional LLVMBuild files.
+
+Each file may define multiple components. Each component is described by a
+section who name starts with ``component``. The remainder of the section
+name is ignored, but each section name must be unique. Typically components
+are just number in order for files with multiple components
+(``component_0``, ``component_1``, and so on).
+
+.. warning::
+
+ Section names not matching this format (or the ``common`` section) are
+ currently unused and are disallowed.
+
+Every component is defined by the properties in the section. The exact
+list of properties that are allowed depends on the component type.
+Components **may not** define any properties other than those expected
+by the component type.
+
+Every component must define the following properties:
+
+- ``type`` **[required]**
+
+ The type of the component. Supported component types are detailed
+ below. Most components will define additional properties which may be
+ required or optional.
+
+- ``name`` **[required]**
+
+ The name of the component. Names are required to be unique across the
+ entire project.
+
+- ``parent`` **[required]**
+
+ The name of the logical parent of the component. Components are
+ organized into a logical tree to make it easier to navigate and
+ organize groups of components. The parents have no semantics as far
+ as the project build is concerned, however. Typically, the parent
+ will be the main component of the parent directory.
+
+ Components may reference the root pseudo component using ``$ROOT`` to
+ indicate they should logically be grouped at the top-level.
+
+Components may define the following properties:
+
+- ``dependencies`` **[optional]**
+
+ If specified, a list of names of components which *must* be built
+ prior to this one. This should only be exactly those components which
+ produce some tool or source code required for building the component.
+
+ .. note::
+
+ ``Group`` and ``LibraryGroup`` components have no semantics for the
+ actual build, and are not allowed to specify dependencies.
+
+The following section lists the available component types, as well as
+the properties which are associated with that component.
+
+- ``type = Group``
+
+ Group components exist purely to allow additional arbitrary structuring
+ of the logical components tree. For example, one might define a
+ ``Libraries`` group to hold all of the root library components.
+
+ ``Group`` components have no additionally properties.
+
+- ``type = Library``
+
+ Library components define an individual library which should be built
+ from the source code in the component directory.
+
+ Components with this type use the following properties:
+
+ - ``library_name`` **[optional]**
+
+ If given, the name to use for the actual library file on disk. If
+ not given, the name is derived from the component name itself.
+
+ - ``required_libraries`` **[optional]**
+
+ If given, a list of the names of ``Library`` or ``LibraryGroup``
+ components which must also be linked in whenever this library is
+ used. That is, the link time dependencies for this component. When
+ tools are built, the build system will include the transitive closure
+ of all ``required_libraries`` for the components the tool needs.
+
+ - ``add_to_library_groups`` **[optional]**
+
+ If given, a list of the names of ``LibraryGroup`` components which
+ this component is also part of. This allows nesting groups of
+ components. For example, the ``X86`` target might define a library
+ group for all of the ``X86`` components. That library group might
+ then be included in the ``all-targets`` library group.
+
+ - ``installed`` **[optional]** **[boolean]**
+
+ Whether this library is installed. Libraries that are not installed
+ are only reported by ``llvm-config`` when it is run as part of a
+ development directory.
+
+- ``type = LibraryGroup``
+
+ ``LibraryGroup`` components are a mechanism to allow easy definition of
+ useful sets of related components. In particular, we use them to easily
+ specify things like "all targets", or "all assembly printers".
+
+ Components with this type use the following properties:
+
+ - ``required_libraries`` **[optional]**
+
+ See the ``Library`` type for a description of this property.
+
+ - ``add_to_library_groups`` **[optional]**
+
+ See the ``Library`` type for a description of this property.
+
+- ``type = TargetGroup``
+
+ ``TargetGroup`` components are an extension of ``LibraryGroup``\s,
+ specifically for defining LLVM targets (which are handled specially in a
+ few places).
+
+ The name of the component should always be the name of the target.
+
+ Components with this type use the ``LibraryGroup`` properties in
+ addition to:
+
+ - ``has_asmparser`` **[optional]** **[boolean]**
+
+ Whether this target defines an assembly parser.
+
+ - ``has_asmprinter`` **[optional]** **[boolean]**
+
+ Whether this target defines an assembly printer.
+
+ - ``has_disassembler`` **[optional]** **[boolean]**
+
+ Whether this target defines a disassembler.
+
+ - ``has_jit`` **[optional]** **[boolean]**
+
+ Whether this target supports JIT compilation.
+
+- ``type = Tool``
+
+ ``Tool`` components define standalone command line tools which should be
+ built from the source code in the component directory and linked.
+
+ Components with this type use the following properties:
+
+ - ``required_libraries`` **[optional]**
+
+ If given, a list of the names of ``Library`` or ``LibraryGroup``
+ components which this tool is required to be linked with.
+
+ .. note::
+
+ The values should be the component names, which may not always
+ match up with the actual library names on disk.
+
+ Build systems are expected to properly include all of the libraries
+ required by the linked components (i.e., the transitive closure of
+ ``required_libraries``).
+
+ Build systems are also expected to understand that those library
+ components must be built prior to linking -- they do not also need
+ to be listed under ``dependencies``.
+
+- ``type = BuildTool``
+
+ ``BuildTool`` components are like ``Tool`` components, except that the
+ tool is supposed to be built for the platform where the build is running
+ (instead of that platform being targetted). Build systems are expected
+ to handle the fact that required libraries may need to be built for
+ multiple platforms in order to be able to link this tool.
+
+ ``BuildTool`` components currently use the exact same properties as
+ ``Tool`` components, the type distinction is only used to differentiate
+ what the tool is built for.
+
diff --git a/docs/LangRef.html b/docs/LangRef.html
index ed47f1f00e..35ad94d990 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -256,10 +256,17 @@
<li><a href="#int_cos">'<tt>llvm.cos.*</tt>' Intrinsic</a></li>
<li><a href="#int_pow">'<tt>llvm.pow.*</tt>' Intrinsic</a></li>
<li><a href="#int_exp">'<tt>llvm.exp.*</tt>' Intrinsic</a></li>
+ <li><a href="#int_exp2">'<tt>llvm.exp2.*</tt>' Intrinsic</a></li>
<li><a href="#int_log">'<tt>llvm.log.*</tt>' Intrinsic</a></li>
+ <li><a href="#int_log10">'<tt>llvm.log10.*</tt>' Intrinsic</a></li>
+ <li><a href="#int_log2">'<tt>llvm.log2.*</tt>' Intrinsic</a></li>
<li><a href="#int_fma">'<tt>llvm.fma.*</tt>' Intrinsic</a></li>
<li><a href="#int_fabs">'<tt>llvm.fabs.*</tt>' Intrinsic</a></li>
<li><a href="#int_floor">'<tt>llvm.floor.*</tt>' Intrinsic</a></li>
+ <li><a href="#int_ceil">'<tt>llvm.ceil.*</tt>' Intrinsic</a></li>
+ <li><a href="#int_trunc">'<tt>llvm.trunc.*</tt>' Intrinsic</a></li>
+ <li><a href="#int_rint">'<tt>llvm.rint.*</tt>' Intrinsic</a></li>
+ <li><a href="#int_nearbyint">'<tt>llvm.nearbyint.*</tt>' Intrinsic</a></li>
</ol>
</li>
<li><a href="#int_manip">Bit Manipulation Intrinsics</a>
@@ -535,7 +542,7 @@ define i32 @main() { <i>; i32()* </i>&nbsp;
<p>This example is made up of a <a href="#globalvars">global variable</a> named
"<tt>.str</tt>", an external declaration of the "<tt>puts</tt>" function,
a <a href="#functionstructure">function definition</a> for
- "<tt>main</tt>" and <a href="#namedmetadatastructure">named metadata</a>
+ "<tt>main</tt>" and <a href="#namedmetadatastructure">named metadata</a>
"<tt>foo</tt>".</p>
<p>In general, a module is made up of a list of global values (where both
@@ -722,10 +729,10 @@ define i32 @main() { <i>; i32()* </i>&nbsp;
target to use whatever tricks it wants to produce fast code for the
target, without having to conform to an externally specified ABI
(Application Binary Interface).
- <a href="CodeGenerator.html#tailcallopt">Tail calls can only be optimized
- when this or the GHC convention is used.</a> This calling convention
- does not support varargs and requires the prototype of all callees to
- exactly match the prototype of the function definition.</dd>
+ <a href="CodeGenerator.html#id80">Tail calls can only be optimized
+ when this, the GHC or the HiPE convention is used.</a> This calling
+ convention does not support varargs and requires the prototype of all
+ callees to exactly match the prototype of the function definition.</dd>
<dt><b>"<tt>coldcc</tt>" - The cold calling convention</b>:</dt>
<dd>This calling convention attempts to make code in the caller as efficient
@@ -742,7 +749,7 @@ define i32 @main() { <i>; i32()* </i>&nbsp;
disabling callee save registers. This calling convention should not be
used lightly but only for specific situations such as an alternative to
the <em>register pinning</em> performance technique often used when
- implementing functional programming languages.At the moment only X86
+ implementing functional programming languages. At the moment only X86
supports this convention and it has the following limitations:
<ul>
<li>On <em>X86-32</em> only supports up to 4 bit type parameters. No
@@ -751,10 +758,25 @@ define i32 @main() { <i>; i32()* </i>&nbsp;
6 floating point parameters.</li>
</ul>
This calling convention supports
- <a href="CodeGenerator.html#tailcallopt">tail call optimization</a> but
+ <a href="CodeGenerator.html#id80">tail call optimization</a> but
requires both the caller and callee are using it.
</dd>
+ <dt><b>"<tt>cc <em>11</em></tt>" - The HiPE calling convention</b>:</dt>
+ <dd>This calling convention has been implemented specifically for use by the
+ <a href="http://www.it.uu.se/research/group/hipe/">High-Performance Erlang
+ (HiPE)</a> compiler, <em>the</em> native code compiler of the
+ <a href="http://www.erlang.org/download.shtml">Ericsson's Open Source
+ Erlang/OTP system</a>. It uses more registers for argument passing than
+ the ordinary C calling convention and defines no callee-saved registers.
+ The calling convention properly supports
+ <a href="CodeGenerator.html#id80">tail call optimization</a> but requires
+ that both the caller and the callee use it. It uses a <em>register
+ pinning</em> mechanism, similar to GHC's convention, for keeping
+ frequently accessed runtime components pinned to specific hardware
+ registers. At the moment only X86 supports this convention (both 32 and 64
+ bit).</dd>
+
<dt><b>"<tt>cc &lt;<em>n</em>&gt;</tt>" - Numbered convention</b>:</dt>
<dd>Any calling convention may be specified by number, allowing
target-specific calling conventions to be used. Target specific calling
@@ -1096,7 +1118,7 @@ declare signext i8 @returns_signed_char()
<tt><a href="#readonly">readonly</a></tt> functions should not write to
<tt>byval</tt> parameters). This is not a valid attribute for return
values.</p>
-
+
<p>The byval attribute also supports specifying an alignment with
the align attribute. It indicates the alignment of the stack slot to
form and the known alignment of the pointer specified to the call site. If
@@ -1175,7 +1197,7 @@ define void @f() gc "name" { ... }
<p>Function attributes are set to communicate additional information about a
function. Function attributes are considered to be part of the function, not
- of the function type, so functions with different parameter attributes can
+ of the function type, so functions with different function attributes can
have the same function type.</p>
<p>Function attributes are simple keywords that follow the type specified. If
@@ -1454,12 +1476,12 @@ target datalayout = "<i>layout specification</i>"
this is not a specification from the frontend of what alignment the code
generator should use.</p>
-<p>Instead, if specified, the target data layout is required to match what the
- ultimate <em>code generator</em> expects. This string is used by the
+<p>Instead, if specified, the target data layout is required to match what the
+ ultimate <em>code generator</em> expects. This string is used by the
mid-level optimizers to
- improve code, and this only works if it matches what the ultimate code
+ improve code, and this only works if it matches what the ultimate code
generator uses. If you would like to generate IR that does not embed this
- target-specific detail into the IR, then you don't have to specify the
+ target-specific detail into the IR, then you don't have to specify the
string. This will disable some optimizations that require precise layout
information, but this also prevents those optimizations from introducing
target specificity into the IR.</p>
@@ -1607,7 +1629,7 @@ any write to the same byte, except:</p>
addresses which do not behave like normal memory. It does not generally
provide cross-thread synchronization.)
<li>Otherwise, if there is no write to the same byte that happens before
- <var>R<sub>byte</sub></var>, <var>R<sub>byte</sub></var> returns
+ <var>R<sub>byte</sub></var>, <var>R<sub>byte</sub></var> returns
<tt>undef</tt> for that byte.
<li>Otherwise, if <var>R<sub>byte</sub></var> may see exactly one write,
<var>R<sub>byte</sub></var> returns the value written by that
@@ -2106,8 +2128,8 @@ in signal handlers).</p>
Structures in registers are accessed using the
'<tt><a href="#i_extractvalue">extractvalue</a></tt>' and
'<tt><a href="#i_insertvalue">insertvalue</a></tt>' instructions.</p>
-
-<p>Structures may optionally be "packed" structures, which indicate that the
+
+<p>Structures may optionally be "packed" structures, which indicate that the
alignment of the struct is one byte, and that there is no padding between
the elements. In non-packed structs, padding between field types is inserted
as defined by the DataLayout string in the module, which is required to match
@@ -2120,13 +2142,13 @@ in signal handlers).</p>
no way to write one. Identified types can be recursive, can be opaqued, and are
never uniqued.
</p>
-
+
<h5>Syntax:</h5>
<pre>
%T1 = type { &lt;type list&gt; } <i>; Identified normal struct type</i>
%T2 = type &lt;{ &lt;type list&gt; }&gt; <i>; Identified packed struct type</i>
</pre>
-
+
<h5>Examples:</h5>
<table class="layout">
<tr class="layout">
@@ -2147,7 +2169,7 @@ in signal handlers).</p>
</table>
</div>
-
+
<!-- _______________________________________________________________________ -->
<h4>
<a name="t_opaque">Opaque Structure Types</a>
@@ -2188,7 +2210,7 @@ in signal handlers).</p>
<h5>Overview:</h5>
<p>The pointer type is used to specify memory locations.
Pointers are commonly used to reference objects in memory.</p>
-
+
<p>Pointer types may have an optional address space attribute defining the
numbered address space where the pointed-to object resides. The default
address space is number zero. The semantics of non-zero address
@@ -2980,7 +3002,7 @@ call void asm sideeffect "something bad", ""()<b>, !srcloc !42</b>
</pre>
</div>
-<p>A <a href="#namedmetadatastructure">named metadata</a> is a collection of
+<p>A <a href="#namedmetadatastructure">named metadata</a> is a collection of
metadata nodes, which can be looked up in the module symbol table. For
example:</p>
@@ -2990,7 +3012,7 @@ call void asm sideeffect "something bad", ""()<b>, !srcloc !42</b>
</pre>
</div>
-<p>Metadata can be used as function arguments. Here <tt>llvm.dbg.value</tt>
+<p>Metadata can be used as function arguments. Here <tt>llvm.dbg.value</tt>
function is using two metadata arguments:</p>
<div class="doc_code">
@@ -3099,7 +3121,7 @@ are padding and what the TBAA tags of the struct are.</p>
<h4>
<a name="fpmath">'<tt>fpmath</tt>' Metadata</a>
</h4>
-
+
<div>
<p><tt>fpmath</tt> metadata may be attached to any instruction of floating point
@@ -3515,13 +3537,13 @@ cast formed of bitcast or getelementptr. For example, a legal use of it is:</p>
control flow, not values (the one exception being the
'<a href="#i_invoke"><tt>invoke</tt></a>' instruction).</p>
-<p>The terminator instructions are:
- '<a href="#i_ret"><tt>ret</tt></a>',
+<p>The terminator instructions are:
+ '<a href="#i_ret"><tt>ret</tt></a>',
'<a href="#i_br"><tt>br</tt></a>',
- '<a href="#i_switch"><tt>switch</tt></a>',
+ '<a href="#i_switch"><tt>switch</tt></a>',
'<a href="#i_indirectbr"><tt>indirectbr</tt></a>',
- '<a href="#i_invoke"><tt>invoke</tt></a>',
- '<a href="#i_resume"><tt>resume</tt></a>', and
+ '<a href="#i_invoke"><tt>invoke</tt></a>',
+ '<a href="#i_resume"><tt>resume</tt></a>', and
'<a href="#i_unreachable"><tt>unreachable</tt></a>'.</p>
<!-- _______________________________________________________________________ -->
@@ -3820,7 +3842,7 @@ IfUnequal:
</div>
<!-- _______________________________________________________________________ -->
-
+
<h4>
<a name="i_resume">'<tt>resume</tt>' Instruction</a>
</h4>
@@ -4419,7 +4441,7 @@ IfUnequal:
vectors, each vector element of <tt>op1</tt> is shifted by the corresponding
shift amount in <tt>op2</tt>.</p>
-<p>If the <tt>nuw</tt> keyword is present, then the shift produces a
+<p>If the <tt>nuw</tt> keyword is present, then the shift produces a
<a href="#poisonvalues">poison value</a> if it shifts out any non-zero bits. If
the <tt>nsw</tt> keyword is present, then the shift produces a
<a href="#poisonvalues">poison value</a> if it shifts out any bits that disagree
@@ -5052,7 +5074,7 @@ IfUnequal:
href="#memorymodel">defined</a> results when they may see multiple atomic
stores. The type of the pointee must be an integer type whose bit width
is a power of two greater than or equal to eight and less than or equal
- to a target-specific size limit. <code>align</code> must be explicitly
+ to a target-specific size limit. <code>align</code> must be explicitly
specified on atomic loads, and the load has undefined behavior if the
alignment is not set to a value which is at least the size in bytes of
the pointee. <code>!nontemporal</code> does not have any defined semantics
@@ -5133,7 +5155,7 @@ IfUnequal:
href="#memorymodel">defined</a> results when they may see multiple atomic
stores. The type of the pointee must be an integer type whose bit width
is a power of two greater than or equal to eight and less than or equal
- to a target-specific size limit. <code>align</code> must be explicitly
+ to a target-specific size limit. <code>align</code> must be explicitly
specified on atomic stores, and the store has undefined behavior if the
alignment is not set to a value which is at least the size in bytes of
the pointee. <code>!nontemporal</code> does not have any defined semantics
@@ -5395,7 +5417,7 @@ specified by the <var>operation</var> argument:</p>
<pre>
&lt;result&gt; = getelementptr &lt;pty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
&lt;result&gt; = getelementptr inbounds &lt;pty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
- &lt;result&gt; = getelementptr &lt;ptr vector&gt; ptrval, &lt;vector index type&gt; idx
+ &lt;result&gt; = getelementptr &lt;ptr vector&gt; ptrval, &lt;vector index type&gt; idx
</pre>
<h5>Overview:</h5>
@@ -5419,9 +5441,11 @@ specified by the <var>operation</var> argument:</p>
<p>The type of each index argument depends on the type it is indexing into.
When indexing into a (optionally packed) structure, only <tt>i32</tt>
- integer <b>constants</b> are allowed. When indexing into an array, pointer
- or vector, integers of any width are allowed, and they are not required to be
- constant. These integers are treated as signed values where relevant.</p>
+ integer <b>constants</b> are allowed (when using a vector of indices they
+ must all be the <b>same</b> <tt>i32</tt> integer constant). When indexing
+ into an array, pointer or vector, integers of any width are allowed, and
+ they are not required to be constant. These integers are treated as signed
+ values where relevant.</p>
<p>For example, let's consider a C code fragment and how it gets compiled to
LLVM:</p>
@@ -5520,9 +5544,8 @@ define i32* @foo(%struct.ST* %s) {
%iptr = getelementptr [10 x i32]* @arr, i16 0, i16 0
</pre>
-<p>In cases where the pointer argument is a vector of pointers, only a
- single index may be used, and the number of vector elements has to be
- the same. For example: </p>
+<p>In cases where the pointer argument is a vector of pointers, each index must
+ be a vector with the same number of elements. For example: </p>
<pre class="doc_code">
%A = getelementptr <4 x i8*> %ptrs, <4 x i64> %offsets,
</pre>
@@ -7500,6 +7523,39 @@ LLVM</a>.</p>
<!-- _______________________________________________________________________ -->
<h4>
+ <a name="int_exp2">'<tt>llvm.exp2.*</tt>' Intrinsic</a>
+</h4>
+
+<div>
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.exp2</tt> on any
+ floating point or vector of floating point type. Not all targets support all
+ types however.</p>
+
+<pre>
+ declare float @llvm.exp2.f32(float %Val)
+ declare double @llvm.exp2.f64(double %Val)
+ declare x86_fp80 @llvm.exp2.f80(x86_fp80 %Val)
+ declare fp128 @llvm.exp2.f128(fp128 %Val)
+ declare ppc_fp128 @llvm.exp2.ppcf128(ppc_fp128 %Val)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.exp2.*</tt>' intrinsics perform the exp2 function.</p>
+
+<h5>Arguments:</h5>
+<p>The argument and return value are floating point numbers of the same
+ type.</p>
+
+<h5>Semantics:</h5>
+<p>This function returns the same values as the libm <tt>exp2</tt> functions
+ would, and handles error conditions in the same way.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h4>
<a name="int_log">'<tt>llvm.log.*</tt>' Intrinsic</a>
</h4>
@@ -7533,6 +7589,72 @@ LLVM</a>.</p>
<!-- _______________________________________________________________________ -->
<h4>
+ <a name="int_log10">'<tt>llvm.log10.*</tt>' Intrinsic</a>
+</h4>
+
+<div>
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.log10</tt> on any
+ floating point or vector of floating point type. Not all targets support all
+ types however.</p>
+
+<pre>
+ declare float @llvm.log10.f32(float %Val)
+ declare double @llvm.log10.f64(double %Val)
+ declare x86_fp80 @llvm.log10.f80(x86_fp80 %Val)
+ declare fp128 @llvm.log10.f128(fp128 %Val)
+ declare ppc_fp128 @llvm.log10.ppcf128(ppc_fp128 %Val)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.log10.*</tt>' intrinsics perform the log10 function.</p>
+
+<h5>Arguments:</h5>
+<p>The argument and return value are floating point numbers of the same
+ type.</p>
+
+<h5>Semantics:</h5>
+<p>This function returns the same values as the libm <tt>log10</tt> functions
+ would, and handles error conditions in the same way.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h4>
+ <a name="int_log2">'<tt>llvm.log2.*</tt>' Intrinsic</a>
+</h4>
+
+<div>
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.log2</tt> on any
+ floating point or vector of floating point type. Not all targets support all
+ types however.</p>
+
+<pre>
+ declare float @llvm.log2.f32(float %Val)
+ declare double @llvm.log2.f64(double %Val)
+ declare x86_fp80 @llvm.log2.f80(x86_fp80 %Val)
+ declare fp128 @llvm.log2.f128(fp128 %Val)
+ declare ppc_fp128 @llvm.log2.ppcf128(ppc_fp128 %Val)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.log2.*</tt>' intrinsics perform the log2 function.</p>
+
+<h5>Arguments:</h5>
+<p>The argument and return value are floating point numbers of the same
+ type.</p>
+
+<h5>Semantics:</h5>
+<p>This function returns the same values as the libm <tt>log2</tt> functions
+ would, and handles error conditions in the same way.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h4>
<a name="int_fma">'<tt>llvm.fma.*</tt>' Intrinsic</a>
</h4>
@@ -7633,6 +7755,143 @@ LLVM</a>.</p>
</div>
+<!-- _______________________________________________________________________ -->
+<h4>
+ <a name="int_ceil">'<tt>llvm.ceil.*</tt>' Intrinsic</a>
+</h4>
+
+<div>
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.ceil</tt> on any
+ floating point or vector of floating point type. Not all targets support all
+ types however.</p>
+
+<pre>
+ declare float @llvm.ceil.f32(float %Val)
+ declare double @llvm.ceil.f64(double %Val)
+ declare x86_fp80 @llvm.ceil.f80(x86_fp80 %Val)
+ declare fp128 @llvm.ceil.f128(fp128 %Val)
+ declare ppc_fp128 @llvm.ceil.ppcf128(ppc_fp128 %Val)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.ceil.*</tt>' intrinsics return the ceiling of
+ the operand.</p>
+
+<h5>Arguments:</h5>
+<p>The argument and return value are floating point numbers of the same
+ type.</p>
+
+<h5>Semantics:</h5>
+<p>This function returns the same values as the libm <tt>ceil</tt> functions
+ would, and handles error conditions in the same way.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h4>
+ <a name="int_trunc">'<tt>llvm.trunc.*</tt>' Intrinsic</a>
+</h4>
+
+<div>
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.trunc</tt> on any
+ floating point or vector of floating point type. Not all targets support all
+ types however.</p>
+
+<pre>
+ declare float @llvm.trunc.f32(float %Val)
+ declare double @llvm.trunc.f64(double %Val)
+ declare x86_fp80 @llvm.trunc.f80(x86_fp80 %Val)
+ declare fp128 @llvm.trunc.f128(fp128 %Val)
+ declare ppc_fp128 @llvm.trunc.ppcf128(ppc_fp128 %Val)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.trunc.*</tt>' intrinsics returns the operand rounded to the
+ nearest integer not larger in magnitude than the operand.</p>
+
+<h5>Arguments:</h5>
+<p>The argument and return value are floating point numbers of the same
+ type.</p>
+
+<h5>Semantics:</h5>
+<p>This function returns the same values as the libm <tt>trunc</tt> functions
+ would, and handles error conditions in the same way.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h4>
+ <a name="int_rint">'<tt>llvm.rint.*</tt>' Intrinsic</a>
+</h4>
+
+<div>
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.rint</tt> on any
+ floating point or vector of floating point type. Not all targets support all
+ types however.</p>
+
+<pre>
+ declare float @llvm.rint.f32(float %Val)
+ declare double @llvm.rint.f64(double %Val)
+ declare x86_fp80 @llvm.rint.f80(x86_fp80 %Val)
+ declare fp128 @llvm.rint.f128(fp128 %Val)
+ declare ppc_fp128 @llvm.rint.ppcf128(ppc_fp128 %Val)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.rint.*</tt>' intrinsics returns the operand rounded to the
+ nearest integer. It may raise an inexact floating-point exception if the
+ operand isn't an integer.</p>
+
+<h5>Arguments:</h5>
+<p>The argument and return value are floating point numbers of the same
+ type.</p>
+
+<h5>Semantics:</h5>
+<p>This function returns the same values as the libm <tt>rint</tt> functions
+ would, and handles error conditions in the same way.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h4>
+ <a name="int_nearbyint">'<tt>llvm.nearbyint.*</tt>' Intrinsic</a>
+</h4>
+
+<div>
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.nearbyint</tt> on any
+ floating point or vector of floating point type. Not all targets support all
+ types however.</p>
+
+<pre>
+ declare float @llvm.nearbyint.f32(float %Val)
+ declare double @llvm.nearbyint.f64(double %Val)
+ declare x86_fp80 @llvm.nearbyint.f80(x86_fp80 %Val)
+ declare fp128 @llvm.nearbyint.f128(fp128 %Val)
+ declare ppc_fp128 @llvm.nearbyint.ppcf128(ppc_fp128 %Val)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.nearbyint.*</tt>' intrinsics returns the operand rounded to the
+ nearest integer.</p>
+
+<h5>Arguments:</h5>
+<p>The argument and return value are floating point numbers of the same
+ type.</p>
+
+<h5>Semantics:</h5>
+<p>This function returns the same values as the libm <tt>nearbyint</tt>
+ functions would, and handles error conditions in the same way.</p>
+
+</div>
+
</div>
<!-- ======================================================================= -->
@@ -8164,7 +8423,7 @@ intrinsic function should be used instead.</p>
format. This means that it is
a dense encoding (in memory) but does not support computation in the
format.</p>
-
+
<p>This means that code must first load the half-precision floating point
value as an i16, then convert it to float with <a
href="#int_convert_from_fp16"><tt>llvm.convert.from.fp16</tt></a>.
@@ -8698,7 +8957,7 @@ intrinsic function should be used instead.</p>
is a boolean and determines whether <tt>llvm.objectsize</tt> returns 0 (if
true) or -1 (if false) when the object size is unknown.
The second argument only accepts constants.</p>
-
+
<h5>Semantics:</h5>
<p>The <tt>llvm.objectsize</tt> intrinsic is lowered to a constant representing
the size of the object concerned. If the size cannot be determined at compile
diff --git a/docs/MakefileGuide.rst b/docs/MakefileGuide.rst
index d2bdd24a9e..2c1d33e962 100644
--- a/docs/MakefileGuide.rst
+++ b/docs/MakefileGuide.rst
@@ -339,7 +339,7 @@ the invocation of ``make check-local`` in the ``test`` directory. The intended
usage for this is to assist in running specific suites of tests. If
``TESTSUITE`` is not set, the implementation of ``check-local`` should run all
normal tests. It is up to the project to define what different values for
-``TESTSUTE`` will do. See the `Testing Guide <TestingGuide.html>`_ for further
+``TESTSUTE`` will do. See the :doc:`Testing Guide <TestingGuide>` for further
details.
``check-local``
diff --git a/docs/Passes.html b/docs/Passes.html
index aa9f8bc247..7bffc54d8d 100644
--- a/docs/Passes.html
+++ b/docs/Passes.html
@@ -175,7 +175,6 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print " <p>\n" if !
<tr><td><a href="#simplify-libcalls">-simplify-libcalls</a></td><td>Simplify well-known library calls</td></tr>
<tr><td><a href="#simplifycfg">-simplifycfg</a></td><td>Simplify the CFG</td></tr>
<tr><td><a href="#sink">-sink</a></td><td>Code sinking</td></tr>
-<tr><td><a href="#sretpromotion">-sretpromotion</a></td><td>Promote sret arguments to multiple ret values</td></tr>
<tr><td><a href="#strip">-strip</a></td><td>Strip all symbols from a module</td></tr>
<tr><td><a href="#strip-dead-debug-info">-strip-dead-debug-info</a></td><td>Strip debug info for unused symbols</td></tr>
<tr><td><a href="#strip-dead-prototypes">-strip-dead-prototypes</a></td><td>Strip Unused Function Prototypes</td></tr>
@@ -1715,29 +1714,6 @@ if (X &lt; 3) {</pre>
<!-------------------------------------------------------------------------- -->
<h3>
- <a name="sretpromotion">-sretpromotion: Promote sret arguments to multiple ret values</a>
-</h3>
-<div>
- <p>
- This pass finds functions that return a struct (using a pointer to the struct
- as the first argument of the function, marked with the '<tt>sret</tt>' attribute) and
- replaces them with a new function that simply returns each of the elements of
- that struct (using multiple return values).
- </p>
-
- <p>
- This pass works under a number of conditions:
- </p>
-
- <ul>
- <li>The returned struct must not contain other structs</li>
- <li>The returned struct must only be used to load values from</li>
- <li>The placeholder struct passed in is the result of an <tt>alloca</tt></li>
- </ul>
-</div>
-
-<!-------------------------------------------------------------------------- -->
-<h3>
<a name="strip">-strip: Strip all symbols from a module</a>
</h3>
<div>
diff --git a/docs/Projects.rst b/docs/Projects.rst
index 63132887a5..c5d03d33a0 100644
--- a/docs/Projects.rst
+++ b/docs/Projects.rst
@@ -156,9 +156,9 @@ Underneath your top level directory, you should have the following directories:
* LLVM provides a ``tcl`` procedure that is used by ``Dejagnu`` to run tests.
It can be found in ``llvm/lib/llvm-dg.exp``. This test procedure uses ``RUN``
lines in the actual test case to determine how to run the test. See the
- `TestingGuide <TestingGuide.html>`_ for more details. You can easily write
- Makefile support similar to the Makefiles in ``llvm/test`` to use ``Dejagnu``
- to run your project's tests.
+ :doc:`TestingGuide` for more details. You can easily write Makefile
+ support similar to the Makefiles in ``llvm/test`` to use ``Dejagnu`` to
+ run your project's tests.
* LLVM contains an optional package called ``llvm-test``, which provides
benchmarks and programs that are known to compile with the Clang front
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index fc3a8b71bd..31cb26ca02 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -226,17 +226,26 @@ Release Notes</a>.</h1>
<div>
<p><a href="http://polly.llvm.org/">Polly</a> is an <em>experimental</em>
- optimizer for data locality and parallelism. It currently provides high-level
- loop optimizations and automatic parallelisation (using the OpenMP run time).
- Work in the area of automatic SIMD and accelerator code generation was
- started.</p>
+ optimizer for data locality and parallelism. It provides high-level
+ loop optimizations and automatic parallelisation.</p>
<p>Within the LLVM 3.2 time-frame there were the following highlights:</p>
<ul>
- <li>...</li>
+ <li>isl, the integer set library used by Polly, was relicensed to the MIT
+license</li>
+ <li>isl based code generation<br />
+ <ul>
+<li>MIT licensed replacement for CLooG (LGPLv2) </li>
+<li>Fine grained option handling (separation of
+core and border computations, control overhead vs. code size) </li>
+</li>
+</ul>
+<li>Support for FORTRAN and dragonegg</li>
+<li>OpenMP code generation fixes</li>
</ul>
+
</div>
</div>
@@ -489,7 +498,7 @@ Release Notes</a>.</h1>
<ul>
<li>The inner most loops must have a single basic block.</li>
<li>The number of iterations are known before the loop starts to execute.</li>
- <li>The loop counter needs to be incrimented by one.</li>
+ <li>The loop counter needs to be incremented by one.</li>
<li>The loop trip count <b>can</b> be a variable.</li>
<li>Loops do <b>not</b> need to start at zero.</li>
<li>The induction variable can be used inside the loop.</li>
@@ -647,6 +656,46 @@ Release Notes</a>.</h1>
<!--=========================================================================-->
<h3>
+<a name="PowerPC">PowerPC Target Improvements</a>
+</h3>
+
+<div>
+
+<ul>
+<p>Many fixes and changes across LLVM (and Clang) for better compliance with
+ the 64-bit PowerPC ELF Application Binary Interface, interoperability with
+ GCC, and overall 64-bit PowerPC support. Some highlights include:</p>
+<ul>
+ <li> MCJIT support added.</li>
+ <li> PPC64 relocation support and (small code model) TOC handling
+ added.</li>
+ <li> Parameter passing and return value fixes (alignment issues,
+ padding, varargs support, proper register usage, odd-sized
+ structure support, float support, extension of return values
+ for i32 return values).</li>
+ <li> Fixes in spill and reload code for vector registers.</li>
+ <li> C++ exception handling enabled.</li>
+ <li> Changes to remediate double-rounding compatibility issues with
+ respect to GCC behavior.</li>
+ <li> Refactoring to disentangle ppc64-elf-linux ABI from Darwin
+ ppc64 ABI support.</li>
+ <li> Assorted new test cases and test case fixes (endian and word
+ size issues).</li>
+ <li> Fixes for big-endian codegen bugs, instruction encodings, and
+ instruction constraints.</li>
+ <li> Implemented -integrated-as support.</li>
+ <li> Additional support for Altivec compare operations.</li>
+ <li> IBM long double support.</li>
+</ul>
+<p>There have also been code generation improvements for both 32- and 64-bit
+ code. Instruction scheduling support for the Freescale e500mc and e5500
+ cores has been added.</p>
+</ul>
+
+</div>
+
+<!--=========================================================================-->
+<h3>
<a name="OtherTS">Other Target Specific Improvements</a>
</h3>
@@ -670,6 +719,8 @@ Release Notes</a>.</h1>
from the previous release.</p>
<ul>
+ <li>The CellSPU port has been removed. It can still be found in older
+ versions.</li>
<li>...</li>
</ul>
diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html
deleted file mode 100644
index 546aab9d1a..0000000000
--- a/docs/SourceLevelDebugging.html
+++ /dev/null
@@ -1,2858 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
- "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
- <title>Source Level Debugging with LLVM</title>
- <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-<body>
-
-<h1>Source Level Debugging with LLVM</h1>
-
-<table class="layout" style="width:100%">
- <tr class="layout">
- <td class="left">
-<ul>
- <li><a href="#introduction">Introduction</a>
- <ol>
- <li><a href="#phil">Philosophy behind LLVM debugging information</a></li>
- <li><a href="#consumers">Debug information consumers</a></li>
- <li><a href="#debugopt">Debugging optimized code</a></li>
- </ol></li>
- <li><a href="#format">Debugging information format</a>
- <ol>
- <li><a href="#debug_info_descriptors">Debug information descriptors</a>
- <ul>
- <li><a href="#format_compile_units">Compile unit descriptors</a></li>
- <li><a href="#format_files">File descriptors</a></li>
- <li><a href="#format_global_variables">Global variable descriptors</a></li>
- <li><a href="#format_subprograms">Subprogram descriptors</a></li>
- <li><a href="#format_blocks">Block descriptors</a></li>
- <li><a href="#format_basic_type">Basic type descriptors</a></li>
- <li><a href="#format_derived_type">Derived type descriptors</a></li>
- <li><a href="#format_composite_type">Composite type descriptors</a></li>
- <li><a href="#format_subrange">Subrange descriptors</a></li>
- <li><a href="#format_enumeration">Enumerator descriptors</a></li>
- <li><a href="#format_variables">Local variables</a></li>
- </ul></li>
- <li><a href="#format_common_intrinsics">Debugger intrinsic functions</a>
- <ul>
- <li><a href="#format_common_declare">llvm.dbg.declare</a></li>
- <li><a href="#format_common_value">llvm.dbg.value</a></li>
- </ul></li>
- </ol></li>
- <li><a href="#format_common_lifetime">Object lifetimes and scoping</a></li>
- <li><a href="#ccxx_frontend">C/C++ front-end specific debug information</a>
- <ol>
- <li><a href="#ccxx_compile_units">C/C++ source file information</a></li>
- <li><a href="#ccxx_global_variable">C/C++ global variable information</a></li>
- <li><a href="#ccxx_subprogram">C/C++ function information</a></li>
- <li><a href="#ccxx_basic_types">C/C++ basic types</a></li>
- <li><a href="#ccxx_derived_types">C/C++ derived types</a></li>
- <li><a href="#ccxx_composite_types">C/C++ struct/union types</a></li>
- <li><a href="#ccxx_enumeration_types">C/C++ enumeration types</a></li>
- </ol></li>
- <li><a href="#llvmdwarfextension">LLVM Dwarf Extensions</a>
- <ol>
- <li><a href="#objcproperty">Debugging Information Extension
- for Objective C Properties</a>
- <ul>
- <li><a href="#objcpropertyintroduction">Introduction</a></li>
- <li><a href="#objcpropertyproposal">Proposal</a></li>
- <li><a href="#objcpropertynewattributes">New DWARF Attributes</a></li>
- <li><a href="#objcpropertynewconstants">New DWARF Constants</a></li>
- </ul>
- </li>
- <li><a href="#acceltable">Name Accelerator Tables</a>
- <ul>
- <li><a href="#acceltableintroduction">Introduction</a></li>
- <li><a href="#acceltablehashes">Hash Tables</a></li>
- <li><a href="#acceltabledetails">Details</a></li>
- <li><a href="#acceltablecontents">Contents</a></li>
- <li><a href="#acceltableextensions">Language Extensions and File Format Changes</a></li>
- </ul>
- </li>
- </ol>
- </li>
-</ul>
-</td>
-</tr></table>
-
-<div class="doc_author">
- <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
- and <a href="mailto:jlaskey@mac.com">Jim Laskey</a></p>
-</div>
-
-
-<!-- *********************************************************************** -->
-<h2><a name="introduction">Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This document is the central repository for all information pertaining to
- debug information in LLVM. It describes the <a href="#format">actual format
- that the LLVM debug information</a> takes, which is useful for those
- interested in creating front-ends or dealing directly with the information.
- Further, this document provides specific examples of what debug information
- for C/C++ looks like.</p>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="phil">Philosophy behind LLVM debugging information</a>
-</h3>
-
-<div>
-
-<p>The idea of the LLVM debugging information is to capture how the important
- pieces of the source-language's Abstract Syntax Tree map onto LLVM code.
- Several design aspects have shaped the solution that appears here. The
- important ones are:</p>
-
-<ul>
- <li>Debugging information should have very little impact on the rest of the
- compiler. No transformations, analyses, or code generators should need to
- be modified because of debugging information.</li>
-
- <li>LLVM optimizations should interact in <a href="#debugopt">well-defined and
- easily described ways</a> with the debugging information.</li>
-
- <li>Because LLVM is designed to support arbitrary programming languages,
- LLVM-to-LLVM tools should not need to know anything about the semantics of
- the source-level-language.</li>
-
- <li>Source-level languages are often <b>widely</b> different from one another.
- LLVM should not put any restrictions of the flavor of the source-language,
- and the debugging information should work with any language.</li>
-
- <li>With code generator support, it should be possible to use an LLVM compiler
- to compile a program to native machine code and standard debugging
- formats. This allows compatibility with traditional machine-code level
- debuggers, like GDB or DBX.</li>
-</ul>
-
-<p>The approach used by the LLVM implementation is to use a small set
- of <a href="#format_common_intrinsics">intrinsic functions</a> to define a
- mapping between LLVM program objects and the source-level objects. The
- description of the source-level program is maintained in LLVM metadata
- in an <a href="#ccxx_frontend">implementation-defined format</a>
- (the C/C++ front-end currently uses working draft 7 of
- the <a href="http://www.eagercon.com/dwarf/dwarf3std.htm">DWARF 3
- standard</a>).</p>
-
-<p>When a program is being debugged, a debugger interacts with the user and
- turns the stored debug information into source-language specific information.
- As such, a debugger must be aware of the source-language, and is thus tied to
- a specific language or family of languages.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="consumers">Debug information consumers</a>
-</h3>
-
-<div>
-
-<p>The role of debug information is to provide meta information normally
- stripped away during the compilation process. This meta information provides
- an LLVM user a relationship between generated code and the original program
- source code.</p>
-
-<p>Currently, debug information is consumed by DwarfDebug to produce dwarf
- information used by the gdb debugger. Other targets could use the same
- information to produce stabs or other debug forms.</p>
-
-<p>It would also be reasonable to use debug information to feed profiling tools
- for analysis of generated code, or, tools for reconstructing the original
- source from generated code.</p>
-
-<p>TODO - expound a bit more.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="debugopt">Debugging optimized code</a>
-</h3>
-
-<div>
-
-<p>An extremely high priority of LLVM debugging information is to make it
- interact well with optimizations and analysis. In particular, the LLVM debug
- information provides the following guarantees:</p>
-
-<ul>
- <li>LLVM debug information <b>always provides information to accurately read
- the source-level state of the program</b>, regardless of which LLVM
- optimizations have been run, and without any modification to the
- optimizations themselves. However, some optimizations may impact the
- ability to modify the current state of the program with a debugger, such
- as setting program variables, or calling functions that have been
- deleted.</li>
-
- <li>As desired, LLVM optimizations can be upgraded to be aware of the LLVM
- debugging information, allowing them to update the debugging information
- as they perform aggressive optimizations. This means that, with effort,
- the LLVM optimizers could optimize debug code just as well as non-debug
- code.</li>
-
- <li>LLVM debug information does not prevent optimizations from
- happening (for example inlining, basic block reordering/merging/cleanup,
- tail duplication, etc).</li>
-
- <li>LLVM debug information is automatically optimized along with the rest of
- the program, using existing facilities. For example, duplicate
- information is automatically merged by the linker, and unused information
- is automatically removed.</li>
-</ul>
-
-<p>Basically, the debug information allows you to compile a program with
- "<tt>-O0 -g</tt>" and get full debug information, allowing you to arbitrarily
- modify the program as it executes from a debugger. Compiling a program with
- "<tt>-O3 -g</tt>" gives you full debug information that is always available
- and accurate for reading (e.g., you get accurate stack traces despite tail
- call elimination and inlining), but you might lose the ability to modify the
- program and call functions where were optimized out of the program, or
- inlined away completely.</p>
-
-<p><a href="TestingGuide.html#quicktestsuite">LLVM test suite</a> provides a
- framework to test optimizer's handling of debugging information. It can be
- run like this:</p>
-
-<div class="doc_code">
-<pre>
-% cd llvm/projects/test-suite/MultiSource/Benchmarks # or some other level
-% make TEST=dbgopt
-</pre>
-</div>
-
-<p>This will test impact of debugging information on optimization passes. If
- debugging information influences optimization passes then it will be reported
- as a failure. See <a href="TestingGuide.html">TestingGuide</a> for more
- information on LLVM test infrastructure and how to run various tests.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
- <a name="format">Debugging information format</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>LLVM debugging information has been carefully designed to make it possible
- for the optimizer to optimize the program and debugging information without
- necessarily having to know anything about debugging information. In
- particular, the use of metadata avoids duplicated debugging information from
- the beginning, and the global dead code elimination pass automatically
- deletes debugging information for a function if it decides to delete the
- function. </p>
-
-<p>To do this, most of the debugging information (descriptors for types,
- variables, functions, source files, etc) is inserted by the language
- front-end in the form of LLVM metadata. </p>
-
-<p>Debug information is designed to be agnostic about the target debugger and
- debugging information representation (e.g. DWARF/Stabs/etc). It uses a
- generic pass to decode the information that represents variables, types,
- functions, namespaces, etc: this allows for arbitrary source-language
- semantics and type-systems to be used, as long as there is a module
- written for the target debugger to interpret the information. </p>
-
-<p>To provide basic functionality, the LLVM debugger does have to make some
- assumptions about the source-level language being debugged, though it keeps
- these to a minimum. The only common features that the LLVM debugger assumes
- exist are <a href="#format_files">source files</a>,
- and <a href="#format_global_variables">program objects</a>. These abstract
- objects are used by a debugger to form stack traces, show information about
- local variables, etc.</p>
-
-<p>This section of the documentation first describes the representation aspects
- common to any source-language. The <a href="#ccxx_frontend">next section</a>
- describes the data layout conventions used by the C and C++ front-ends.</p>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="debug_info_descriptors">Debug information descriptors</a>
-</h3>
-
-<div>
-
-<p>In consideration of the complexity and volume of debug information, LLVM
- provides a specification for well formed debug descriptors. </p>
-
-<p>Consumers of LLVM debug information expect the descriptors for program
- objects to start in a canonical format, but the descriptors can include
- additional information appended at the end that is source-language
- specific. All LLVM debugging information is versioned, allowing backwards
- compatibility in the case that the core structures need to change in some
- way. Also, all debugging information objects start with a tag to indicate
- what type of object it is. The source-language is allowed to define its own
- objects, by using unreserved tag numbers. We recommend using with tags in
- the range 0x1000 through 0x2000 (there is a defined enum DW_TAG_user_base =
- 0x1000.)</p>
-
-<p>The fields of debug descriptors used internally by LLVM
- are restricted to only the simple data types <tt>i32</tt>, <tt>i1</tt>,
- <tt>float</tt>, <tt>double</tt>, <tt>mdstring</tt> and <tt>mdnode</tt>. </p>
-
-<div class="doc_code">
-<pre>
-!1 = metadata !{
- i32, ;; A tag
- ...
-}
-</pre>
-</div>
-
-<p><a name="LLVMDebugVersion">The first field of a descriptor is always an
- <tt>i32</tt> containing a tag value identifying the content of the
- descriptor. The remaining fields are specific to the descriptor. The values
- of tags are loosely bound to the tag values of DWARF information entries.
- However, that does not restrict the use of the information supplied to DWARF
- targets. To facilitate versioning of debug information, the tag is augmented
- with the current debug version (LLVMDebugVersion = 8 &lt;&lt; 16 or
- 0x80000 or 524288.)</a></p>
-
-<p>The details of the various descriptors follow.</p>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="format_compile_units">Compile unit descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!0 = metadata !{
- i32, ;; Tag = 17 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
- ;; (DW_TAG_compile_unit)
- i32, ;; Unused field.
- i32, ;; DWARF language identifier (ex. DW_LANG_C89)
- metadata, ;; Source file name
- metadata, ;; Source file directory (includes trailing slash)
- metadata ;; Producer (ex. "4.0.1 LLVM (LLVM research group)")
- i1, ;; True if this is a main compile unit.
- i1, ;; True if this is optimized.
- metadata, ;; Flags
- i32 ;; Runtime version
- metadata ;; List of enums types
- metadata ;; List of retained types
- metadata ;; List of subprograms
- metadata ;; List of global variables
-}
-</pre>
-</div>
-
-<p>These descriptors contain a source language ID for the file (we use the DWARF
- 3.0 ID numbers, such as <tt>DW_LANG_C89</tt>, <tt>DW_LANG_C_plus_plus</tt>,
- <tt>DW_LANG_Cobol74</tt>, etc), three strings describing the filename,
- working directory of the compiler, and an identifier string for the compiler
- that produced it.</p>
-
-<p>Compile unit descriptors provide the root context for objects declared in a
- specific compilation unit. File descriptors are defined using this context.
- These descriptors are collected by a named metadata
- <tt>!llvm.dbg.cu</tt>. Compile unit descriptor keeps track of subprograms,
- global variables and type information.
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="format_files">File descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!0 = metadata !{
- i32, ;; Tag = 41 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
- ;; (DW_TAG_file_type)
- metadata, ;; Source file name
- metadata, ;; Source file directory (includes trailing slash)
- metadata ;; Unused
-}
-</pre>
-</div>
-
-<p>These descriptors contain information for a file. Global variables and top
- level functions would be defined using this context.k File descriptors also
- provide context for source line correspondence. </p>
-
-<p>Each input file is encoded as a separate file descriptor in LLVM debugging
- information output. </p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="format_global_variables">Global variable descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!1 = metadata !{
- i32, ;; Tag = 52 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
- ;; (DW_TAG_variable)
- i32, ;; Unused field.
- metadata, ;; Reference to context descriptor
- metadata, ;; Name
- metadata, ;; Display name (fully qualified C++ name)
- metadata, ;; MIPS linkage name (for C++)
- metadata, ;; Reference to file where defined
- i32, ;; Line number where defined
- metadata, ;; Reference to type descriptor
- i1, ;; True if the global is local to compile unit (static)
- i1, ;; True if the global is defined in the compile unit (not extern)
- {}* ;; Reference to the global variable
-}
-</pre>
-</div>
-
-<p>These descriptors provide debug information about globals variables. The
-provide details such as name, type and where the variable is defined. All
-global variables are collected inside the named metadata
-<tt>!llvm.dbg.cu</tt>.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="format_subprograms">Subprogram descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
- i32, ;; Tag = 46 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
- ;; (DW_TAG_subprogram)
- i32, ;; Unused field.
- metadata, ;; Reference to context descriptor
- metadata, ;; Name
- metadata, ;; Display name (fully qualified C++ name)
- metadata, ;; MIPS linkage name (for C++)
- metadata, ;; Reference to file where defined
- i32, ;; Line number where defined
- metadata, ;; Reference to type descriptor
- i1, ;; True if the global is local to compile unit (static)
- i1, ;; True if the global is defined in the compile unit (not extern)
- i32, ;; Line number where the scope of the subprogram begins
- i32, ;; Virtuality, e.g. dwarf::DW_VIRTUALITY__virtual
- i32, ;; Index into a virtual function
- metadata, ;; indicates which base type contains the vtable pointer for the
- ;; derived class
- i32, ;; Flags - Artifical, Private, Protected, Explicit, Prototyped.
- i1, ;; isOptimized
- Function *,;; Pointer to LLVM function
- metadata, ;; Lists function template parameters
- metadata ;; Function declaration descriptor
- metadata ;; List of function variables
-}
-</pre>
-</div>
-
-<p>These descriptors provide debug information about functions, methods and
- subprograms. They provide details such as name, return types and the source
- location where the subprogram is defined.
-</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="format_blocks">Block descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!3 = metadata !{
- i32, ;; Tag = 11 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_lexical_block)
- metadata,;; Reference to context descriptor
- i32, ;; Line number
- i32, ;; Column number
- metadata,;; Reference to source file
- i32 ;; Unique ID to identify blocks from a template function
-}
-</pre>
-</div>
-
-<p>This descriptor provides debug information about nested blocks within a
- subprogram. The line number and column numbers are used to dinstinguish
- two lexical blocks at same depth. </p>
-
-<div class="doc_code">
-<pre>
-!3 = metadata !{
- i32, ;; Tag = 11 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_lexical_block)
- metadata ;; Reference to the scope we're annotating with a file change
- metadata,;; Reference to the file the scope is enclosed in.
-}
-</pre>
-</div>
-
-<p>This descriptor provides a wrapper around a lexical scope to handle file
- changes in the middle of a lexical block.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="format_basic_type">Basic type descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!4 = metadata !{
- i32, ;; Tag = 36 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
- ;; (DW_TAG_base_type)
- metadata, ;; Reference to context
- metadata, ;; Name (may be "" for anonymous types)
- metadata, ;; Reference to file where defined (may be NULL)
- i32, ;; Line number where defined (may be 0)
- i64, ;; Size in bits
- i64, ;; Alignment in bits
- i64, ;; Offset in bits
- i32, ;; Flags
- i32 ;; DWARF type encoding
-}
-</pre>
-</div>
-
-<p>These descriptors define primitive types used in the code. Example int, bool
- and float. The context provides the scope of the type, which is usually the
- top level. Since basic types are not usually user defined the context
- and line number can be left as NULL and 0. The size, alignment and offset
- are expressed in bits and can be 64 bit values. The alignment is used to
- round the offset when embedded in a
- <a href="#format_composite_type">composite type</a> (example to keep float
- doubles on 64 bit boundaries.) The offset is the bit offset if embedded in
- a <a href="#format_composite_type">composite type</a>.</p>
-
-<p>The type encoding provides the details of the type. The values are typically
- one of the following:</p>
-
-<div class="doc_code">
-<pre>
-DW_ATE_address = 1
-DW_ATE_boolean = 2
-DW_ATE_float = 4
-DW_ATE_signed = 5
-DW_ATE_signed_char = 6
-DW_ATE_unsigned = 7
-DW_ATE_unsigned_char = 8
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="format_derived_type">Derived type descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!5 = metadata !{
- i32, ;; Tag (see below)
- metadata, ;; Reference to context
- metadata, ;; Name (may be "" for anonymous types)
- metadata, ;; Reference to file where defined (may be NULL)
- i32, ;; Line number where defined (may be 0)
- i64, ;; Size in bits
- i64, ;; Alignment in bits
- i64, ;; Offset in bits
- i32, ;; Flags to encode attributes, e.g. private
- metadata, ;; Reference to type derived from
- metadata, ;; (optional) Name of the Objective C property associated with
- ;; Objective-C an ivar
- metadata, ;; (optional) Name of the Objective C property getter selector.
- metadata, ;; (optional) Name of the Objective C property setter selector.
- i32 ;; (optional) Objective C property attributes.
-}
-</pre>
-</div>
-
-<p>These descriptors are used to define types derived from other types. The
-value of the tag varies depending on the meaning. The following are possible
-tag values:</p>
-
-<div class="doc_code">
-<pre>
-DW_TAG_formal_parameter = 5
-DW_TAG_member = 13
-DW_TAG_pointer_type = 15
-DW_TAG_reference_type = 16
-DW_TAG_typedef = 22
-DW_TAG_const_type = 38
-DW_TAG_volatile_type = 53
-DW_TAG_restrict_type = 55
-</pre>
-</div>
-
-<p><tt>DW_TAG_member</tt> is used to define a member of
- a <a href="#format_composite_type">composite type</a>
- or <a href="#format_subprograms">subprogram</a>. The type of the member is
- the <a href="#format_derived_type">derived
- type</a>. <tt>DW_TAG_formal_parameter</tt> is used to define a member which
- is a formal argument of a subprogram.</p>
-
-<p><tt>DW_TAG_typedef</tt> is used to provide a name for the derived type.</p>
-
-<p><tt>DW_TAG_pointer_type</tt>, <tt>DW_TAG_reference_type</tt>,
- <tt>DW_TAG_const_type</tt>, <tt>DW_TAG_volatile_type</tt> and
- <tt>DW_TAG_restrict_type</tt> are used to qualify
- the <a href="#format_derived_type">derived type</a>. </p>
-
-<p><a href="#format_derived_type">Derived type</a> location can be determined
- from the context and line number. The size, alignment and offset are
- expressed in bits and can be 64 bit values. The alignment is used to round
- the offset when embedded in a <a href="#format_composite_type">composite
- type</a> (example to keep float doubles on 64 bit boundaries.) The offset is
- the bit offset if embedded in a <a href="#format_composite_type">composite
- type</a>.</p>
-
-<p>Note that the <tt>void *</tt> type is expressed as a type derived from NULL.
-</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="format_composite_type">Composite type descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!6 = metadata !{
- i32, ;; Tag (see below)
- metadata, ;; Reference to context
- metadata, ;; Name (may be "" for anonymous types)
- metadata, ;; Reference to file where defined (may be NULL)
- i32, ;; Line number where defined (may be 0)
- i64, ;; Size in bits
- i64, ;; Alignment in bits
- i64, ;; Offset in bits
- i32, ;; Flags
- metadata, ;; Reference to type derived from
- metadata, ;; Reference to array of member descriptors
- i32 ;; Runtime languages
-}
-</pre>
-</div>
-
-<p>These descriptors are used to define types that are composed of 0 or more
-elements. The value of the tag varies depending on the meaning. The following
-are possible tag values:</p>
-
-<div class="doc_code">
-<pre>
-DW_TAG_array_type = 1
-DW_TAG_enumeration_type = 4
-DW_TAG_structure_type = 19
-DW_TAG_union_type = 23
-DW_TAG_vector_type = 259
-DW_TAG_subroutine_type = 21
-DW_TAG_inheritance = 28
-</pre>
-</div>
-
-<p>The vector flag indicates that an array type is a native packed vector.</p>
-
-<p>The members of array types (tag = <tt>DW_TAG_array_type</tt>) or vector types
- (tag = <tt>DW_TAG_vector_type</tt>) are <a href="#format_subrange">subrange
- descriptors</a>, each representing the range of subscripts at that level of
- indexing.</p>
-
-<p>The members of enumeration types (tag = <tt>DW_TAG_enumeration_type</tt>) are
- <a href="#format_enumeration">enumerator descriptors</a>, each representing
- the definition of enumeration value for the set. All enumeration type
- descriptors are collected inside the named metadata
- <tt>!llvm.dbg.cu</tt>.</p>
-
-<p>The members of structure (tag = <tt>DW_TAG_structure_type</tt>) or union (tag
- = <tt>DW_TAG_union_type</tt>) types are any one of
- the <a href="#format_basic_type">basic</a>,
- <a href="#format_derived_type">derived</a>
- or <a href="#format_composite_type">composite</a> type descriptors, each
- representing a field member of the structure or union.</p>
-
-<p>For C++ classes (tag = <tt>DW_TAG_structure_type</tt>), member descriptors
- provide information about base classes, static members and member
- functions. If a member is a <a href="#format_derived_type">derived type
- descriptor</a> and has a tag of <tt>DW_TAG_inheritance</tt>, then the type
- represents a base class. If the member of is
- a <a href="#format_global_variables">global variable descriptor</a> then it
- represents a static member. And, if the member is
- a <a href="#format_subprograms">subprogram descriptor</a> then it represents
- a member function. For static members and member
- functions, <tt>getName()</tt> returns the members link or the C++ mangled
- name. <tt>getDisplayName()</tt> the simplied version of the name.</p>
-
-<p>The first member of subroutine (tag = <tt>DW_TAG_subroutine_type</tt>) type
- elements is the return type for the subroutine. The remaining elements are
- the formal arguments to the subroutine.</p>
-
-<p><a href="#format_composite_type">Composite type</a> location can be
- determined from the context and line number. The size, alignment and
- offset are expressed in bits and can be 64 bit values. The alignment is used
- to round the offset when embedded in
- a <a href="#format_composite_type">composite type</a> (as an example, to keep
- float doubles on 64 bit boundaries.) The offset is the bit offset if embedded
- in a <a href="#format_composite_type">composite type</a>.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="format_subrange">Subrange descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!42 = metadata !{
- i32, ;; Tag = 33 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_subrange_type)
- i64, ;; Low value
- i64 ;; High value
-}
-</pre>
-</div>
-
-<p>These descriptors are used to define ranges of array subscripts for an array
- <a href="#format_composite_type">composite type</a>. The low value defines
- the lower bounds typically zero for C/C++. The high value is the upper
- bounds. Values are 64 bit. High - low + 1 is the size of the array. If low
- > high the array bounds are not included in generated debugging information.
-</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="format_enumeration">Enumerator descriptors</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!6 = metadata !{
- i32, ;; Tag = 40 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
- ;; (DW_TAG_enumerator)
- metadata, ;; Name
- i64 ;; Value
-}
-</pre>
-</div>
-
-<p>These descriptors are used to define members of an
- enumeration <a href="#format_composite_type">composite type</a>, it
- associates the name to the value.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="format_variables">Local variables</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!7 = metadata !{
- i32, ;; Tag (see below)
- metadata, ;; Context
- metadata, ;; Name
- metadata, ;; Reference to file where defined
- i32, ;; 24 bit - Line number where defined
- ;; 8 bit - Argument number. 1 indicates 1st argument.
- metadata, ;; Type descriptor
- i32, ;; flags
- metadata ;; (optional) Reference to inline location
-}
-</pre>
-</div>
-
-<p>These descriptors are used to define variables local to a sub program. The
- value of the tag depends on the usage of the variable:</p>
-
-<div class="doc_code">
-<pre>
-DW_TAG_auto_variable = 256
-DW_TAG_arg_variable = 257
-DW_TAG_return_variable = 258
-</pre>
-</div>
-
-<p>An auto variable is any variable declared in the body of the function. An
- argument variable is any variable that appears as a formal argument to the
- function. A return variable is used to track the result of a function and
- has no source correspondent.</p>
-
-<p>The context is either the subprogram or block where the variable is defined.
- Name the source variable name. Context and line indicate where the
- variable was defined. Type descriptor defines the declared type of the
- variable.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="format_common_intrinsics">Debugger intrinsic functions</a>
-</h3>
-
-<div>
-
-<p>LLVM uses several intrinsic functions (name prefixed with "llvm.dbg") to
- provide debug information at various points in generated code.</p>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="format_common_declare">llvm.dbg.declare</a>
-</h4>
-
-<div>
-<pre>
- void %<a href="#format_common_declare">llvm.dbg.declare</a>(metadata, metadata)
-</pre>
-
-<p>This intrinsic provides information about a local element (e.g., variable). The
- first argument is metadata holding the alloca for the variable. The
- second argument is metadata containing a description of the variable.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="format_common_value">llvm.dbg.value</a>
-</h4>
-
-<div>
-<pre>
- void %<a href="#format_common_value">llvm.dbg.value</a>(metadata, i64, metadata)
-</pre>
-
-<p>This intrinsic provides information when a user source variable is set to a
- new value. The first argument is the new value (wrapped as metadata). The
- second argument is the offset in the user source variable where the new value
- is written. The third argument is metadata containing a description of the
- user source variable.</p>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="format_common_lifetime">Object lifetimes and scoping</a>
-</h3>
-
-<div>
-<p>In many languages, the local variables in functions can have their lifetimes
- or scopes limited to a subset of a function. In the C family of languages,
- for example, variables are only live (readable and writable) within the
- source block that they are defined in. In functional languages, values are
- only readable after they have been defined. Though this is a very obvious
- concept, it is non-trivial to model in LLVM, because it has no notion of
- scoping in this sense, and does not want to be tied to a language's scoping
- rules.</p>
-
-<p>In order to handle this, the LLVM debug format uses the metadata attached to
- llvm instructions to encode line number and scoping information. Consider
- the following C fragment, for example:</p>
-
-<div class="doc_code">
-<pre>
-1. void foo() {
-2. int X = 21;
-3. int Y = 22;
-4. {
-5. int Z = 23;
-6. Z = X;
-7. }
-8. X = Y;
-9. }
-</pre>
-</div>
-
-<p>Compiled to LLVM, this function would be represented like this:</p>
-
-<div class="doc_code">
-<pre>
-define void @foo() nounwind ssp {
-entry:
- %X = alloca i32, align 4 ; &lt;i32*&gt; [#uses=4]
- %Y = alloca i32, align 4 ; &lt;i32*&gt; [#uses=4]
- %Z = alloca i32, align 4 ; &lt;i32*&gt; [#uses=3]
- %0 = bitcast i32* %X to {}* ; &lt;{}*&gt; [#uses=1]
- call void @llvm.dbg.declare(metadata !{i32 * %X}, metadata !0), !dbg !7
- store i32 21, i32* %X, !dbg !8
- %1 = bitcast i32* %Y to {}* ; &lt;{}*&gt; [#uses=1]
- call void @llvm.dbg.declare(metadata !{i32 * %Y}, metadata !9), !dbg !10
- store i32 22, i32* %Y, !dbg !11
- %2 = bitcast i32* %Z to {}* ; &lt;{}*&gt; [#uses=1]
- call void @llvm.dbg.declare(metadata !{i32 * %Z}, metadata !12), !dbg !14
- store i32 23, i32* %Z, !dbg !15
- %tmp = load i32* %X, !dbg !16 ; &lt;i32&gt; [#uses=1]
- %tmp1 = load i32* %Y, !dbg !16 ; &lt;i32&gt; [#uses=1]
- %add = add nsw i32 %tmp, %tmp1, !dbg !16 ; &lt;i32&gt; [#uses=1]
- store i32 %add, i32* %Z, !dbg !16
- %tmp2 = load i32* %Y, !dbg !17 ; &lt;i32&gt; [#uses=1]
- store i32 %tmp2, i32* %X, !dbg !17
- ret void, !dbg !18
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-!0 = metadata !{i32 459008, metadata !1, metadata !"X",
- metadata !3, i32 2, metadata !6}; [ DW_TAG_auto_variable ]
-!1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", metadata !"foo",
- metadata !"foo", metadata !3, i32 1, metadata !4,
- i1 false, i1 true}; [DW_TAG_subprogram ]
-!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"foo.c",
- metadata !"/private/tmp", metadata !"clang 1.1", i1 true,
- i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
-!4 = metadata !{i32 458773, metadata !3, metadata !"", null, i32 0, i64 0, i64 0,
- i64 0, i32 0, null, metadata !5, i32 0}; [DW_TAG_subroutine_type ]
-!5 = metadata !{null}
-!6 = metadata !{i32 458788, metadata !3, metadata !"int", metadata !3, i32 0,
- i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
-!7 = metadata !{i32 2, i32 7, metadata !1, null}
-!8 = metadata !{i32 2, i32 3, metadata !1, null}
-!9 = metadata !{i32 459008, metadata !1, metadata !"Y", metadata !3, i32 3,
- metadata !6}; [ DW_TAG_auto_variable ]
-!10 = metadata !{i32 3, i32 7, metadata !1, null}
-!11 = metadata !{i32 3, i32 3, metadata !1, null}
-!12 = metadata !{i32 459008, metadata !13, metadata !"Z", metadata !3, i32 5,
- metadata !6}; [ DW_TAG_auto_variable ]
-!13 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
-!14 = metadata !{i32 5, i32 9, metadata !13, null}
-!15 = metadata !{i32 5, i32 5, metadata !13, null}
-!16 = metadata !{i32 6, i32 5, metadata !13, null}
-!17 = metadata !{i32 8, i32 3, metadata !1, null}
-!18 = metadata !{i32 9, i32 1, metadata !2, null}
-</pre>
-</div>
-
-<p>This example illustrates a few important details about LLVM debugging
- information. In particular, it shows how the <tt>llvm.dbg.declare</tt>
- intrinsic and location information, which are attached to an instruction,
- are applied together to allow a debugger to analyze the relationship between
- statements, variable definitions, and the code used to implement the
- function.</p>
-
-<div class="doc_code">
-<pre>
-call void @llvm.dbg.declare(metadata, metadata !0), !dbg !7
-</pre>
-</div>
-
-<p>The first intrinsic
- <tt>%<a href="#format_common_declare">llvm.dbg.declare</a></tt>
- encodes debugging information for the variable <tt>X</tt>. The metadata
- <tt>!dbg !7</tt> attached to the intrinsic provides scope information for the
- variable <tt>X</tt>.</p>
-
-<div class="doc_code">
-<pre>
-!7 = metadata !{i32 2, i32 7, metadata !1, null}
-!1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo",
- metadata !"foo", metadata !"foo", metadata !3, i32 1,
- metadata !4, i1 false, i1 true}; [DW_TAG_subprogram ]
-</pre>
-</div>
-
-<p>Here <tt>!7</tt> is metadata providing location information. It has four
- fields: line number, column number, scope, and original scope. The original
- scope represents inline location if this instruction is inlined inside a
- caller, and is null otherwise. In this example, scope is encoded by
- <tt>!1</tt>. <tt>!1</tt> represents a lexical block inside the scope
- <tt>!2</tt>, where <tt>!2</tt> is a
- <a href="#format_subprograms">subprogram descriptor</a>. This way the
- location information attached to the intrinsics indicates that the
- variable <tt>X</tt> is declared at line number 2 at a function level scope in
- function <tt>foo</tt>.</p>
-
-<p>Now lets take another example.</p>
-
-<div class="doc_code">
-<pre>
-call void @llvm.dbg.declare(metadata, metadata !12), !dbg !14
-</pre>
-</div>
-
-<p>The second intrinsic
- <tt>%<a href="#format_common_declare">llvm.dbg.declare</a></tt>
- encodes debugging information for variable <tt>Z</tt>. The metadata
- <tt>!dbg !14</tt> attached to the intrinsic provides scope information for
- the variable <tt>Z</tt>.</p>
-
-<div class="doc_code">
-<pre>
-!13 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
-!14 = metadata !{i32 5, i32 9, metadata !13, null}
-</pre>
-</div>
-
-<p>Here <tt>!14</tt> indicates that <tt>Z</tt> is declared at line number 5 and
- column number 9 inside of lexical scope <tt>!13</tt>. The lexical scope
- itself resides inside of lexical scope <tt>!1</tt> described above.</p>
-
-<p>The scope information attached with each instruction provides a
- straightforward way to find instructions covered by a scope.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
- <a name="ccxx_frontend">C/C++ front-end specific debug information</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The C and C++ front-ends represent information about the program in a format
- that is effectively identical
- to <a href="http://www.eagercon.com/dwarf/dwarf3std.htm">DWARF 3.0</a> in
- terms of information content. This allows code generators to trivially
- support native debuggers by generating standard dwarf information, and
- contains enough information for non-dwarf targets to translate it as
- needed.</p>
-
-<p>This section describes the forms used to represent C and C++ programs. Other
- languages could pattern themselves after this (which itself is tuned to
- representing programs in the same way that DWARF 3 does), or they could
- choose to provide completely different forms if they don't fit into the DWARF
- model. As support for debugging information gets added to the various LLVM
- source-language front-ends, the information used should be documented
- here.</p>
-
-<p>The following sections provide examples of various C/C++ constructs and the
- debug information that would best describe those constructs.</p>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="ccxx_compile_units">C/C++ source file information</a>
-</h3>
-
-<div>
-
-<p>Given the source files <tt>MySource.cpp</tt> and <tt>MyHeader.h</tt> located
- in the directory <tt>/Users/mine/sources</tt>, the following code:</p>
-
-<div class="doc_code">
-<pre>
-#include "MyHeader.h"
-
-int main(int argc, char *argv[]) {
- return 0;
-}
-</pre>
-</div>
-
-<p>a C/C++ front-end would generate the following descriptors:</p>
-
-<div class="doc_code">
-<pre>
-...
-;;
-;; Define the compile unit for the main source file "/Users/mine/sources/MySource.cpp".
-;;
-!2 = metadata !{
- i32 524305, ;; Tag
- i32 0, ;; Unused
- i32 4, ;; Language Id
- metadata !"MySource.cpp",
- metadata !"/Users/mine/sources",
- metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build 00)",
- i1 true, ;; Main Compile Unit
- i1 false, ;; Optimized compile unit
- metadata !"", ;; Compiler flags
- i32 0} ;; Runtime version
-
-;;
-;; Define the file for the file "/Users/mine/sources/MySource.cpp".
-;;
-!1 = metadata !{
- i32 524329, ;; Tag
- metadata !"MySource.cpp",
- metadata !"/Users/mine/sources",
- metadata !2 ;; Compile unit
-}
-
-;;
-;; Define the file for the file "/Users/mine/sources/Myheader.h"
-;;
-!3 = metadata !{
- i32 524329, ;; Tag
- metadata !"Myheader.h"
- metadata !"/Users/mine/sources",
- metadata !2 ;; Compile unit
-}
-
-...
-</pre>
-</div>
-
-<p>llvm::Instruction provides easy access to metadata attached with an
-instruction. One can extract line number information encoded in LLVM IR
-using <tt>Instruction::getMetadata()</tt> and
-<tt>DILocation::getLineNumber()</tt>.
-<pre>
- if (MDNode *N = I->getMetadata("dbg")) { // Here I is an LLVM instruction
- DILocation Loc(N); // DILocation is in DebugInfo.h
- unsigned Line = Loc.getLineNumber();
- StringRef File = Loc.getFilename();
- StringRef Dir = Loc.getDirectory();
- }
-</pre>
-</div>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="ccxx_global_variable">C/C++ global variable information</a>
-</h3>
-
-<div>
-
-<p>Given an integer global variable declared as follows:</p>
-
-<div class="doc_code">
-<pre>
-int MyGlobal = 100;
-</pre>
-</div>
-
-<p>a C/C++ front-end would generate the following descriptors:</p>
-
-<div class="doc_code">
-<pre>
-;;
-;; Define the global itself.
-;;
-%MyGlobal = global int 100
-...
-;;
-;; List of debug info of globals
-;;
-!llvm.dbg.cu = !{!0}
-
-;; Define the compile unit.
-!0 = metadata !{
- i32 786449, ;; Tag
- i32 0, ;; Context
- i32 4, ;; Language
- metadata !"foo.cpp", ;; File
- metadata !"/Volumes/Data/tmp", ;; Directory
- metadata !"clang version 3.1 ", ;; Producer
- i1 true, ;; Deprecated field
- i1 false, ;; "isOptimized"?
- metadata !"", ;; Flags
- i32 0, ;; Runtime Version
- metadata !1, ;; Enum Types
- metadata !1, ;; Retained Types
- metadata !1, ;; Subprograms
- metadata !3 ;; Global Variables
-} ; [ DW_TAG_compile_unit ]
-
-;; The Array of Global Variables
-!3 = metadata !{
- metadata !4
-}
-
-!4 = metadata !{
- metadata !5
-}
-
-;;
-;; Define the global variable itself.
-;;
-!5 = metadata !{
- i32 786484, ;; Tag
- i32 0, ;; Unused
- null, ;; Unused
- metadata !"MyGlobal", ;; Name
- metadata !"MyGlobal", ;; Display Name
- metadata !"", ;; Linkage Name
- metadata !6, ;; File
- i32 1, ;; Line
- metadata !7, ;; Type
- i32 0, ;; IsLocalToUnit
- i32 1, ;; IsDefinition
- i32* @MyGlobal ;; LLVM-IR Value
-} ; [ DW_TAG_variable ]
-
-;;
-;; Define the file
-;;
-!6 = metadata !{
- i32 786473, ;; Tag
- metadata !"foo.cpp", ;; File
- metadata !"/Volumes/Data/tmp", ;; Directory
- null ;; Unused
-} ; [ DW_TAG_file_type ]
-
-;;
-;; Define the type
-;;
-!7 = metadata !{
- i32 786468, ;; Tag
- null, ;; Unused
- metadata !"int", ;; Name
- null, ;; Unused
- i32 0, ;; Line
- i64 32, ;; Size in Bits
- i64 32, ;; Align in Bits
- i64 0, ;; Offset
- i32 0, ;; Flags
- i32 5 ;; Encoding
-} ; [ DW_TAG_base_type ]
-
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="ccxx_subprogram">C/C++ function information</a>
-</h3>
-
-<div>
-
-<p>Given a function declared as follows:</p>
-
-<div class="doc_code">
-<pre>
-int main(int argc, char *argv[]) {
- return 0;
-}
-</pre>
-</div>
-
-<p>a C/C++ front-end would generate the following descriptors:</p>
-
-<div class="doc_code">
-<pre>
-;;
-;; Define the anchor for subprograms. Note that the second field of the
-;; anchor is 46, which is the same as the tag for subprograms
-;; (46 = DW_TAG_subprogram.)
-;;
-!6 = metadata !{
- i32 524334, ;; Tag
- i32 0, ;; Unused
- metadata !1, ;; Context
- metadata !"main", ;; Name
- metadata !"main", ;; Display name
- metadata !"main", ;; Linkage name
- metadata !1, ;; File
- i32 1, ;; Line number
- metadata !4, ;; Type
- i1 false, ;; Is local
- i1 true, ;; Is definition
- i32 0, ;; Virtuality attribute, e.g. pure virtual function
- i32 0, ;; Index into virtual table for C++ methods
- i32 0, ;; Type that holds virtual table.
- i32 0, ;; Flags
- i1 false, ;; True if this function is optimized
- Function *, ;; Pointer to llvm::Function
- null ;; Function template parameters
-}
-;;
-;; Define the subprogram itself.
-;;
-define i32 @main(i32 %argc, i8** %argv) {
-...
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="ccxx_basic_types">C/C++ basic types</a>
-</h3>
-
-<div>
-
-<p>The following are the basic type descriptors for C/C++ core types:</p>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="ccxx_basic_type_bool">bool</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
- i32 524324, ;; Tag
- metadata !1, ;; Context
- metadata !"bool", ;; Name
- metadata !1, ;; File
- i32 0, ;; Line number
- i64 8, ;; Size in Bits
- i64 8, ;; Align in Bits
- i64 0, ;; Offset in Bits
- i32 0, ;; Flags
- i32 2 ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="ccxx_basic_char">char</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
- i32 524324, ;; Tag
- metadata !1, ;; Context
- metadata !"char", ;; Name
- metadata !1, ;; File
- i32 0, ;; Line number
- i64 8, ;; Size in Bits
- i64 8, ;; Align in Bits
- i64 0, ;; Offset in Bits
- i32 0, ;; Flags
- i32 6 ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="ccxx_basic_unsigned_char">unsigned char</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
- i32 524324, ;; Tag
- metadata !1, ;; Context
- metadata !"unsigned char",
- metadata !1, ;; File
- i32 0, ;; Line number
- i64 8, ;; Size in Bits
- i64 8, ;; Align in Bits
- i64 0, ;; Offset in Bits
- i32 0, ;; Flags
- i32 8 ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="ccxx_basic_short">short</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
- i32 524324, ;; Tag
- metadata !1, ;; Context
- metadata !"short int",
- metadata !1, ;; File
- i32 0, ;; Line number
- i64 16, ;; Size in Bits
- i64 16, ;; Align in Bits
- i64 0, ;; Offset in Bits
- i32 0, ;; Flags
- i32 5 ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="ccxx_basic_unsigned_short">unsigned short</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
- i32 524324, ;; Tag
- metadata !1, ;; Context
- metadata !"short unsigned int",
- metadata !1, ;; File
- i32 0, ;; Line number
- i64 16, ;; Size in Bits
- i64 16, ;; Align in Bits
- i64 0, ;; Offset in Bits
- i32 0, ;; Flags
- i32 7 ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="ccxx_basic_int">int</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
- i32 524324, ;; Tag
- metadata !1, ;; Context
- metadata !"int", ;; Name
- metadata !1, ;; File
- i32 0, ;; Line number
- i64 32, ;; Size in Bits
- i64 32, ;; Align in Bits
- i64 0, ;; Offset in Bits
- i32 0, ;; Flags
- i32 5 ;; Encoding
-}
-</pre></div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="ccxx_basic_unsigned_int">unsigned int</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
- i32 524324, ;; Tag
- metadata !1, ;; Context
- metadata !"unsigned int",
- metadata !1, ;; File
- i32 0, ;; Line number
- i64 32, ;; Size in Bits
- i64 32, ;; Align in Bits
- i64 0, ;; Offset in Bits
- i32 0, ;; Flags
- i32 7 ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="ccxx_basic_long_long">long long</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
- i32 524324, ;; Tag
- metadata !1, ;; Context
- metadata !"long long int",
- metadata !1, ;; File
- i32 0, ;; Line number
- i64 64, ;; Size in Bits
- i64 64, ;; Align in Bits
- i64 0, ;; Offset in Bits
- i32 0, ;; Flags
- i32 5 ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="ccxx_basic_unsigned_long_long">unsigned long long</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
- i32 524324, ;; Tag
- metadata !1, ;; Context
- metadata !"long long unsigned int",
- metadata !1, ;; File
- i32 0, ;; Line number
- i64 64, ;; Size in Bits
- i64 64, ;; Align in Bits
- i64 0, ;; Offset in Bits
- i32 0, ;; Flags
- i32 7 ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="ccxx_basic_float">float</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
- i32 524324, ;; Tag
- metadata !1, ;; Context
- metadata !"float",
- metadata !1, ;; File
- i32 0, ;; Line number
- i64 32, ;; Size in Bits
- i64 32, ;; Align in Bits
- i64 0, ;; Offset in Bits
- i32 0, ;; Flags
- i32 4 ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="ccxx_basic_double">double</a>
-</h4>
-
-<div>
-
-<div class="doc_code">
-<pre>
-!2 = metadata !{
- i32 524324, ;; Tag
- metadata !1, ;; Context
- metadata !"double",;; Name
- metadata !1, ;; File
- i32 0, ;; Line number
- i64 64, ;; Size in Bits
- i64 64, ;; Align in Bits
- i64 0, ;; Offset in Bits
- i32 0, ;; Flags
- i32 4 ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="ccxx_derived_types">C/C++ derived types</a>
-</h3>
-
-<div>
-
-<p>Given the following as an example of C/C++ derived type:</p>
-
-<div class="doc_code">
-<pre>
-typedef const int *IntPtr;
-</pre>
-</div>
-
-<p>a C/C++ front-end would generate the following descriptors:</p>
-
-<div class="doc_code">
-<pre>
-;;
-;; Define the typedef "IntPtr".
-;;
-!2 = metadata !{
- i32 524310, ;; Tag
- metadata !1, ;; Context
- metadata !"IntPtr", ;; Name
- metadata !3, ;; File
- i32 0, ;; Line number
- i64 0, ;; Size in bits
- i64 0, ;; Align in bits
- i64 0, ;; Offset in bits
- i32 0, ;; Flags
- metadata !4 ;; Derived From type
-}
-
-;;
-;; Define the pointer type.
-;;
-!4 = metadata !{
- i32 524303, ;; Tag
- metadata !1, ;; Context
- metadata !"", ;; Name
- metadata !1, ;; File
- i32 0, ;; Line number
- i64 64, ;; Size in bits
- i64 64, ;; Align in bits
- i64 0, ;; Offset in bits
- i32 0, ;; Flags
- metadata !5 ;; Derived From type
-}
-;;
-;; Define the const type.
-;;
-!5 = metadata !{
- i32 524326, ;; Tag
- metadata !1, ;; Context
- metadata !"", ;; Name
- metadata !1, ;; File
- i32 0, ;; Line number
- i64 32, ;; Size in bits
- i64 32, ;; Align in bits
- i64 0, ;; Offset in bits
- i32 0, ;; Flags
- metadata !6 ;; Derived From type
-}
-;;
-;; Define the int type.
-;;
-!6 = metadata !{
- i32 524324, ;; Tag
- metadata !1, ;; Context
- metadata !"int", ;; Name
- metadata !1, ;; File
- i32 0, ;; Line number
- i64 32, ;; Size in bits
- i64 32, ;; Align in bits
- i64 0, ;; Offset in bits
- i32 0, ;; Flags
- 5 ;; Encoding
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="ccxx_composite_types">C/C++ struct/union types</a>
-</h3>
-
-<div>
-
-<p>Given the following as an example of C/C++ struct type:</p>
-
-<div class="doc_code">
-<pre>
-struct Color {
- unsigned Red;
- unsigned Green;
- unsigned Blue;
-};
-</pre>
-</div>
-
-<p>a C/C++ front-end would generate the following descriptors:</p>
-
-<div class="doc_code">
-<pre>
-;;
-;; Define basic type for unsigned int.
-;;
-!5 = metadata !{
- i32 524324, ;; Tag
- metadata !1, ;; Context
- metadata !"unsigned int",
- metadata !1, ;; File
- i32 0, ;; Line number
- i64 32, ;; Size in Bits
- i64 32, ;; Align in Bits
- i64 0, ;; Offset in Bits
- i32 0, ;; Flags
- i32 7 ;; Encoding
-}
-;;
-;; Define composite type for struct Color.
-;;
-!2 = metadata !{
- i32 524307, ;; Tag
- metadata !1, ;; Context
- metadata !"Color", ;; Name
- metadata !1, ;; Compile unit
- i32 1, ;; Line number
- i64 96, ;; Size in bits
- i64 32, ;; Align in bits
- i64 0, ;; Offset in bits
- i32 0, ;; Flags
- null, ;; Derived From
- metadata !3, ;; Elements
- i32 0 ;; Runtime Language
-}
-
-;;
-;; Define the Red field.
-;;
-!4 = metadata !{
- i32 524301, ;; Tag
- metadata !1, ;; Context
- metadata !"Red", ;; Name
- metadata !1, ;; File
- i32 2, ;; Line number
- i64 32, ;; Size in bits
- i64 32, ;; Align in bits
- i64 0, ;; Offset in bits
- i32 0, ;; Flags
- metadata !5 ;; Derived From type
-}
-
-;;
-;; Define the Green field.
-;;
-!6 = metadata !{
- i32 524301, ;; Tag
- metadata !1, ;; Context
- metadata !"Green", ;; Name
- metadata !1, ;; File
- i32 3, ;; Line number
- i64 32, ;; Size in bits
- i64 32, ;; Align in bits
- i64 32, ;; Offset in bits
- i32 0, ;; Flags
- metadata !5 ;; Derived From type
-}
-
-;;
-;; Define the Blue field.
-;;
-!7 = metadata !{
- i32 524301, ;; Tag
- metadata !1, ;; Context
- metadata !"Blue", ;; Name
- metadata !1, ;; File
- i32 4, ;; Line number
- i64 32, ;; Size in bits
- i64 32, ;; Align in bits
- i64 64, ;; Offset in bits
- i32 0, ;; Flags
- metadata !5 ;; Derived From type
-}
-
-;;
-;; Define the array of fields used by the composite type Color.
-;;
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="ccxx_enumeration_types">C/C++ enumeration types</a>
-</h3>
-
-<div>
-
-<p>Given the following as an example of C/C++ enumeration type:</p>
-
-<div class="doc_code">
-<pre>
-enum Trees {
- Spruce = 100,
- Oak = 200,
- Maple = 300
-};
-</pre>
-</div>
-
-<p>a C/C++ front-end would generate the following descriptors:</p>
-
-<div class="doc_code">
-<pre>
-;;
-;; Define composite type for enum Trees
-;;
-!2 = metadata !{
- i32 524292, ;; Tag
- metadata !1, ;; Context
- metadata !"Trees", ;; Name
- metadata !1, ;; File
- i32 1, ;; Line number
- i64 32, ;; Size in bits
- i64 32, ;; Align in bits
- i64 0, ;; Offset in bits
- i32 0, ;; Flags
- null, ;; Derived From type
- metadata !3, ;; Elements
- i32 0 ;; Runtime language
-}
-
-;;
-;; Define the array of enumerators used by composite type Trees.
-;;
-!3 = metadata !{metadata !4, metadata !5, metadata !6}
-
-;;
-;; Define Spruce enumerator.
-;;
-!4 = metadata !{i32 524328, metadata !"Spruce", i64 100}
-
-;;
-;; Define Oak enumerator.
-;;
-!5 = metadata !{i32 524328, metadata !"Oak", i64 200}
-
-;;
-;; Define Maple enumerator.
-;;
-!6 = metadata !{i32 524328, metadata !"Maple", i64 300}
-
-</pre>
-</div>
-
-</div>
-
-</div>
-
-
-<!-- *********************************************************************** -->
-<h2>
- <a name="llvmdwarfextension">Debugging information format</a>
-</h2>
-<!-- *********************************************************************** -->
-<div>
-<!-- ======================================================================= -->
-<h3>
- <a name="objcproperty">Debugging Information Extension for Objective C Properties</a>
-</h3>
-<div>
-<!-- *********************************************************************** -->
-<h4>
- <a name="objcpropertyintroduction">Introduction</a>
-</h4>
-<!-- *********************************************************************** -->
-
-<div>
-<p>Objective C provides a simpler way to declare and define accessor methods
-using declared properties. The language provides features to declare a
-property and to let compiler synthesize accessor methods.
-</p>
-
-<p>The debugger lets developer inspect Objective C interfaces and their
-instance variables and class variables. However, the debugger does not know
-anything about the properties defined in Objective C interfaces. The debugger
-consumes information generated by compiler in DWARF format. The format does
-not support encoding of Objective C properties. This proposal describes DWARF
-extensions to encode Objective C properties, which the debugger can use to let
-developers inspect Objective C properties.
-</p>
-
-</div>
-
-
-<!-- *********************************************************************** -->
-<h4>
- <a name="objcpropertyproposal">Proposal</a>
-</h4>
-<!-- *********************************************************************** -->
-
-<div>
-<p>Objective C properties exist separately from class members. A property
-can be defined only by &quot;setter&quot; and &quot;getter&quot; selectors, and
-be calculated anew on each access. Or a property can just be a direct access
-to some declared ivar. Finally it can have an ivar &quot;automatically
-synthesized&quot; for it by the compiler, in which case the property can be
-referred to in user code directly using the standard C dereference syntax as
-well as through the property &quot;dot&quot; syntax, but there is no entry in
-the @interface declaration corresponding to this ivar.
-</p>
-<p>
-To facilitate debugging, these properties we will add a new DWARF TAG into the
-DW_TAG_structure_type definition for the class to hold the description of a
-given property, and a set of DWARF attributes that provide said description.
-The property tag will also contain the name and declared type of the property.
-</p>
-<p>
-If there is a related ivar, there will also be a DWARF property attribute placed
-in the DW_TAG_member DIE for that ivar referring back to the property TAG for
-that property. And in the case where the compiler synthesizes the ivar directly,
-the compiler is expected to generate a DW_TAG_member for that ivar (with the
-DW_AT_artificial set to 1), whose name will be the name used to access this
-ivar directly in code, and with the property attribute pointing back to the
-property it is backing.
-</p>
-<p>
-The following examples will serve as illustration for our discussion:
-</p>
-
-<div class="doc_code">
-<pre>
-@interface I1 {
- int n2;
-}
-
-@property int p1;
-@property int p2;
-@end
-
-@implementation I1
-@synthesize p1;
-@synthesize p2 = n2;
-@end
-</pre>
-</div>
-
-<p>
-This produces the following DWARF (this is a &quot;pseudo dwarfdump&quot; output):
-</p>
-<div class="doc_code">
-<pre>
-0x00000100: TAG_structure_type [7] *
- AT_APPLE_runtime_class( 0x10 )
- AT_name( "I1" )
- AT_decl_file( "Objc_Property.m" )
- AT_decl_line( 3 )
-
-0x00000110 TAG_APPLE_property
- AT_name ( "p1" )
- AT_type ( {0x00000150} ( int ) )
-
-0x00000120: TAG_APPLE_property
- AT_name ( "p2" )
- AT_type ( {0x00000150} ( int ) )
-
-0x00000130: TAG_member [8]
- AT_name( "_p1" )
- AT_APPLE_property ( {0x00000110} "p1" )
- AT_type( {0x00000150} ( int ) )
- AT_artificial ( 0x1 )
-
-0x00000140: TAG_member [8]
- AT_name( "n2" )
- AT_APPLE_property ( {0x00000120} "p2" )
- AT_type( {0x00000150} ( int ) )
-
-0x00000150: AT_type( ( int ) )
-</pre>
-</div>
-
-<p> Note, the current convention is that the name of the ivar for an
-auto-synthesized property is the name of the property from which it derives with
-an underscore prepended, as is shown in the example.
-But we actually don't need to know this convention, since we are given the name
-of the ivar directly.
-</p>
-
-<p>
-Also, it is common practice in ObjC to have different property declarations in
-the @interface and @implementation - e.g. to provide a read-only property in
-the interface,and a read-write interface in the implementation. In that case,
-the compiler should emit whichever property declaration will be in force in the
-current translation unit.
-</p>
-
-<p> Developers can decorate a property with attributes which are encoded using
-DW_AT_APPLE_property_attribute.
-</p>
-
-<div class="doc_code">
-<pre>
-@property (readonly, nonatomic) int pr;
-</pre>
-</div>
-<p>
-Which produces a property tag:
-<p>
-<div class="doc_code">
-<pre>
-TAG_APPLE_property [8]
- AT_name( "pr" )
- AT_type ( {0x00000147} (int) )
- AT_APPLE_property_attribute (DW_APPLE_PROPERTY_readonly, DW_APPLE_PROPERTY_nonatomic)
-</pre>
-</div>
-
-<p> The setter and getter method names are attached to the property using
-DW_AT_APPLE_property_setter and DW_AT_APPLE_property_getter attributes.
-</p>
-<div class="doc_code">
-<pre>
-@interface I1
-@property (setter=myOwnP3Setter:) int p3;
--(void)myOwnP3Setter:(int)a;
-@end
-
-@implementation I1
-@synthesize p3;
--(void)myOwnP3Setter:(int)a{ }
-@end
-</pre>
-</div>
-
-<p>
-The DWARF for this would be:
-</p>
-<div class="doc_code">
-<pre>
-0x000003bd: TAG_structure_type [7] *
- AT_APPLE_runtime_class( 0x10 )
- AT_name( "I1" )
- AT_decl_file( "Objc_Property.m" )
- AT_decl_line( 3 )
-
-0x000003cd TAG_APPLE_property
- AT_name ( "p3" )
- AT_APPLE_property_setter ( "myOwnP3Setter:" )
- AT_type( {0x00000147} ( int ) )
-
-0x000003f3: TAG_member [8]
- AT_name( "_p3" )
- AT_type ( {0x00000147} ( int ) )
- AT_APPLE_property ( {0x000003cd} )
- AT_artificial ( 0x1 )
-</pre>
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h4>
- <a name="objcpropertynewtags">New DWARF Tags</a>
-</h4>
-<!-- *********************************************************************** -->
-
-<div>
-<table border="1" cellspacing="0">
- <col width="200">
- <col width="200">
- <tr>
- <th>TAG</th>
- <th>Value</th>
- </tr>
- <tr>
- <td>DW_TAG_APPLE_property</td>
- <td>0x4200</td>
- </tr>
-</table>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h4>
- <a name="objcpropertynewattributes">New DWARF Attributes</a>
-</h4>
-<!-- *********************************************************************** -->
-
-<div>
-<table border="1" cellspacing="0">
- <col width="200">
- <col width="200">
- <col width="200">
- <tr>
- <th>Attribute</th>
- <th>Value</th>
- <th>Classes</th>
- </tr>
- <tr>
- <td>DW_AT_APPLE_property</td>
- <td>0x3fed</td>
- <td>Reference</td>
- </tr>
- <tr>
- <td>DW_AT_APPLE_property_getter</td>
- <td>0x3fe9</td>
- <td>String</td>
- </tr>
- <tr>
- <td>DW_AT_APPLE_property_setter</td>
- <td>0x3fea</td>
- <td>String</td>
- </tr>
- <tr>
- <td>DW_AT_APPLE_property_attribute</td>
- <td>0x3feb</td>
- <td>Constant</td>
- </tr>
-</table>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h4>
- <a name="objcpropertynewconstants">New DWARF Constants</a>
-</h4>
-<!-- *********************************************************************** -->
-
-<div>
-<table border="1" cellspacing="0">
- <col width="200">
- <col width="200">
- <tr>
- <th>Name</th>
- <th>Value</th>
- </tr>
- <tr>
- <td>DW_AT_APPLE_PROPERTY_readonly</td>
- <td>0x1</td>
- </tr>
- <tr>
- <td>DW_AT_APPLE_PROPERTY_readwrite</td>
- <td>0x2</td>
- </tr>
- <tr>
- <td>DW_AT_APPLE_PROPERTY_assign</td>
- <td>0x4</td>
- </tr>
- <tr>
- <td>DW_AT_APPLE_PROPERTY_retain</td>
- <td>0x8</td>
- </tr>
- <tr>
- <td>DW_AT_APPLE_PROPERTY_copy</td>
- <td>0x10</td>
- </tr>
- <tr>
- <td>DW_AT_APPLE_PROPERTY_nonatomic</td>
- <td>0x20</td>
- </tr>
-</table>
-
-</div>
-</div>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="acceltable">Name Accelerator Tables</a>
-</h3>
-<!-- ======================================================================= -->
-<div>
-<!-- ======================================================================= -->
-<h4>
- <a name="acceltableintroduction">Introduction</a>
-</h4>
-<!-- ======================================================================= -->
-<div>
-<p>The .debug_pubnames and .debug_pubtypes formats are not what a debugger
- needs. The "pub" in the section name indicates that the entries in the
- table are publicly visible names only. This means no static or hidden
- functions show up in the .debug_pubnames. No static variables or private class
- variables are in the .debug_pubtypes. Many compilers add different things to
- these tables, so we can't rely upon the contents between gcc, icc, or clang.</p>
-
-<p>The typical query given by users tends not to match up with the contents of
- these tables. For example, the DWARF spec states that "In the case of the
- name of a function member or static data member of a C++ structure, class or
- union, the name presented in the .debug_pubnames section is not the simple
- name given by the DW_AT_name attribute of the referenced debugging information
- entry, but rather the fully qualified name of the data or function member."
- So the only names in these tables for complex C++ entries is a fully
- qualified name. Debugger users tend not to enter their search strings as
- "a::b::c(int,const Foo&) const", but rather as "c", "b::c" , or "a::b::c". So
- the name entered in the name table must be demangled in order to chop it up
- appropriately and additional names must be manually entered into the table
- to make it effective as a name lookup table for debuggers to use.</p>
-
-<p>All debuggers currently ignore the .debug_pubnames table as a result of
- its inconsistent and useless public-only name content making it a waste of
- space in the object file. These tables, when they are written to disk, are
- not sorted in any way, leaving every debugger to do its own parsing
- and sorting. These tables also include an inlined copy of the string values
- in the table itself making the tables much larger than they need to be on
- disk, especially for large C++ programs.</p>
-
-<p>Can't we just fix the sections by adding all of the names we need to this
- table? No, because that is not what the tables are defined to contain and we
- won't know the difference between the old bad tables and the new good tables.
- At best we could make our own renamed sections that contain all of the data
- we need.</p>
-
-<p>These tables are also insufficient for what a debugger like LLDB needs.
- LLDB uses clang for its expression parsing where LLDB acts as a PCH. LLDB is
- then often asked to look for type "foo" or namespace "bar", or list items in
- namespace "baz". Namespaces are not included in the pubnames or pubtypes
- tables. Since clang asks a lot of questions when it is parsing an expression,
- we need to be very fast when looking up names, as it happens a lot. Having new
- accelerator tables that are optimized for very quick lookups will benefit
- this type of debugging experience greatly.</p>
-
-<p>We would like to generate name lookup tables that can be mapped into
- memory from disk, and used as is, with little or no up-front parsing. We would
- also be able to control the exact content of these different tables so they
- contain exactly what we need. The Name Accelerator Tables were designed
- to fix these issues. In order to solve these issues we need to:</p>
-
-<ul>
- <li>Have a format that can be mapped into memory from disk and used as is</li>
- <li>Lookups should be very fast</li>
- <li>Extensible table format so these tables can be made by many producers</li>
- <li>Contain all of the names needed for typical lookups out of the box</li>
- <li>Strict rules for the contents of tables</li>
-</ul>
-
-<p>Table size is important and the accelerator table format should allow the
- reuse of strings from common string tables so the strings for the names are
- not duplicated. We also want to make sure the table is ready to be used as-is
- by simply mapping the table into memory with minimal header parsing.</p>
-
-<p>The name lookups need to be fast and optimized for the kinds of lookups
- that debuggers tend to do. Optimally we would like to touch as few parts of
- the mapped table as possible when doing a name lookup and be able to quickly
- find the name entry we are looking for, or discover there are no matches. In
- the case of debuggers we optimized for lookups that fail most of the time.</p>
-
-<p>Each table that is defined should have strict rules on exactly what is in
- the accelerator tables and documented so clients can rely on the content.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="acceltablehashes">Hash Tables</a>
-</h4>
-<!-- ======================================================================= -->
-
-<div>
-<h5>Standard Hash Tables</h5>
-
-<p>Typical hash tables have a header, buckets, and each bucket points to the
-bucket contents:
-</p>
-
-<div class="doc_code">
-<pre>
-.------------.
-| HEADER |
-|------------|
-| BUCKETS |
-|------------|
-| DATA |
-`------------'
-</pre>
-</div>
-
-<p>The BUCKETS are an array of offsets to DATA for each hash:</p>
-
-<div class="doc_code">
-<pre>
-.------------.
-| 0x00001000 | BUCKETS[0]
-| 0x00002000 | BUCKETS[1]
-| 0x00002200 | BUCKETS[2]
-| 0x000034f0 | BUCKETS[3]
-| | ...
-| 0xXXXXXXXX | BUCKETS[n_buckets]
-'------------'
-</pre>
-</div>
-
-<p>So for bucket[3] in the example above, we have an offset into the table
- 0x000034f0 which points to a chain of entries for the bucket. Each bucket
- must contain a next pointer, full 32 bit hash value, the string itself,
- and the data for the current string value.</p>
-
-<div class="doc_code">
-<pre>
- .------------.
-0x000034f0: | 0x00003500 | next pointer
- | 0x12345678 | 32 bit hash
- | "erase" | string value
- | data[n] | HashData for this bucket
- |------------|
-0x00003500: | 0x00003550 | next pointer
- | 0x29273623 | 32 bit hash
- | "dump" | string value
- | data[n] | HashData for this bucket
- |------------|
-0x00003550: | 0x00000000 | next pointer
- | 0x82638293 | 32 bit hash
- | "main" | string value
- | data[n] | HashData for this bucket
- `------------'
-</pre>
-</div>
-
-<p>The problem with this layout for debuggers is that we need to optimize for
- the negative lookup case where the symbol we're searching for is not present.
- So if we were to lookup "printf" in the table above, we would make a 32 hash
- for "printf", it might match bucket[3]. We would need to go to the offset
- 0x000034f0 and start looking to see if our 32 bit hash matches. To do so, we
- need to read the next pointer, then read the hash, compare it, and skip to
- the next bucket. Each time we are skipping many bytes in memory and touching
- new cache pages just to do the compare on the full 32 bit hash. All of these
- accesses then tell us that we didn't have a match.</p>
-
-<h5>Name Hash Tables</h5>
-
-<p>To solve the issues mentioned above we have structured the hash tables
- a bit differently: a header, buckets, an array of all unique 32 bit hash
- values, followed by an array of hash value data offsets, one for each hash
- value, then the data for all hash values:</p>
-
-<div class="doc_code">
-<pre>
-.-------------.
-| HEADER |
-|-------------|
-| BUCKETS |
-|-------------|
-| HASHES |
-|-------------|
-| OFFSETS |
-|-------------|
-| DATA |
-`-------------'
-</pre>
-</div>
-
-<p>The BUCKETS in the name tables are an index into the HASHES array. By
- making all of the full 32 bit hash values contiguous in memory, we allow
- ourselves to efficiently check for a match while touching as little
- memory as possible. Most often checking the 32 bit hash values is as far as
- the lookup goes. If it does match, it usually is a match with no collisions.
- So for a table with "n_buckets" buckets, and "n_hashes" unique 32 bit hash
- values, we can clarify the contents of the BUCKETS, HASHES and OFFSETS as:</p>
-
-<div class="doc_code">
-<pre>
-.-------------------------.
-| HEADER.magic | uint32_t
-| HEADER.version | uint16_t
-| HEADER.hash_function | uint16_t
-| HEADER.bucket_count | uint32_t
-| HEADER.hashes_count | uint32_t
-| HEADER.header_data_len | uint32_t
-| HEADER_DATA | HeaderData
-|-------------------------|
-| BUCKETS | uint32_t[bucket_count] // 32 bit hash indexes
-|-------------------------|
-| HASHES | uint32_t[hashes_count] // 32 bit hash values
-|-------------------------|
-| OFFSETS | uint32_t[hashes_count] // 32 bit offsets to hash value data
-|-------------------------|
-| ALL HASH DATA |
-`-------------------------'
-</pre>
-</div>
-
-<p>So taking the exact same data from the standard hash example above we end up
- with:</p>
-
-<div class="doc_code">
-<pre>
- .------------.
- | HEADER |
- |------------|
- | 0 | BUCKETS[0]
- | 2 | BUCKETS[1]
- | 5 | BUCKETS[2]
- | 6 | BUCKETS[3]
- | | ...
- | ... | BUCKETS[n_buckets]
- |------------|
- | 0x........ | HASHES[0]
- | 0x........ | HASHES[1]
- | 0x........ | HASHES[2]
- | 0x........ | HASHES[3]
- | 0x........ | HASHES[4]
- | 0x........ | HASHES[5]
- | 0x12345678 | HASHES[6] hash for BUCKETS[3]
- | 0x29273623 | HASHES[7] hash for BUCKETS[3]
- | 0x82638293 | HASHES[8] hash for BUCKETS[3]
- | 0x........ | HASHES[9]
- | 0x........ | HASHES[10]
- | 0x........ | HASHES[11]
- | 0x........ | HASHES[12]
- | 0x........ | HASHES[13]
- | 0x........ | HASHES[n_hashes]
- |------------|
- | 0x........ | OFFSETS[0]
- | 0x........ | OFFSETS[1]
- | 0x........ | OFFSETS[2]
- | 0x........ | OFFSETS[3]
- | 0x........ | OFFSETS[4]
- | 0x........ | OFFSETS[5]
- | 0x000034f0 | OFFSETS[6] offset for BUCKETS[3]
- | 0x00003500 | OFFSETS[7] offset for BUCKETS[3]
- | 0x00003550 | OFFSETS[8] offset for BUCKETS[3]
- | 0x........ | OFFSETS[9]
- | 0x........ | OFFSETS[10]
- | 0x........ | OFFSETS[11]
- | 0x........ | OFFSETS[12]
- | 0x........ | OFFSETS[13]
- | 0x........ | OFFSETS[n_hashes]
- |------------|
- | |
- | |
- | |
- | |
- | |
- |------------|
-0x000034f0: | 0x00001203 | .debug_str ("erase")
- | 0x00000004 | A 32 bit array count - number of HashData with name "erase"
- | 0x........ | HashData[0]
- | 0x........ | HashData[1]
- | 0x........ | HashData[2]
- | 0x........ | HashData[3]
- | 0x00000000 | String offset into .debug_str (terminate data for hash)
- |------------|
-0x00003500: | 0x00001203 | String offset into .debug_str ("collision")
- | 0x00000002 | A 32 bit array count - number of HashData with name "collision"
- | 0x........ | HashData[0]
- | 0x........ | HashData[1]
- | 0x00001203 | String offset into .debug_str ("dump")
- | 0x00000003 | A 32 bit array count - number of HashData with name "dump"
- | 0x........ | HashData[0]
- | 0x........ | HashData[1]
- | 0x........ | HashData[2]
- | 0x00000000 | String offset into .debug_str (terminate data for hash)
- |------------|
-0x00003550: | 0x00001203 | String offset into .debug_str ("main")
- | 0x00000009 | A 32 bit array count - number of HashData with name "main"
- | 0x........ | HashData[0]
- | 0x........ | HashData[1]
- | 0x........ | HashData[2]
- | 0x........ | HashData[3]
- | 0x........ | HashData[4]
- | 0x........ | HashData[5]
- | 0x........ | HashData[6]
- | 0x........ | HashData[7]
- | 0x........ | HashData[8]
- | 0x00000000 | String offset into .debug_str (terminate data for hash)
- `------------'
-</pre>
-</div>
-
-<p>So we still have all of the same data, we just organize it more efficiently
- for debugger lookup. If we repeat the same "printf" lookup from above, we
- would hash "printf" and find it matches BUCKETS[3] by taking the 32 bit hash
- value and modulo it by n_buckets. BUCKETS[3] contains "6" which is the index
- into the HASHES table. We would then compare any consecutive 32 bit hashes
- values in the HASHES array as long as the hashes would be in BUCKETS[3]. We
- do this by verifying that each subsequent hash value modulo n_buckets is still
- 3. In the case of a failed lookup we would access the memory for BUCKETS[3], and
- then compare a few consecutive 32 bit hashes before we know that we have no match.
- We don't end up marching through multiple words of memory and we really keep the
- number of processor data cache lines being accessed as small as possible.</p>
-
-<p>The string hash that is used for these lookup tables is the Daniel J.
- Bernstein hash which is also used in the ELF GNU_HASH sections. It is a very
- good hash for all kinds of names in programs with very few hash collisions.</p>
-
-<p>Empty buckets are designated by using an invalid hash index of UINT32_MAX.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="acceltabledetails">Details</a>
-</h4>
-<!-- ======================================================================= -->
-<div>
-<p>These name hash tables are designed to be generic where specializations of
- the table get to define additional data that goes into the header
- ("HeaderData"), how the string value is stored ("KeyType") and the content
- of the data for each hash value.</p>
-
-<h5>Header Layout</h5>
-<p>The header has a fixed part, and the specialized part. The exact format of
- the header is:</p>
-<div class="doc_code">
-<pre>
-struct Header
-{
- uint32_t magic; // 'HASH' magic value to allow endian detection
- uint16_t version; // Version number
- uint16_t hash_function; // The hash function enumeration that was used
- uint32_t bucket_count; // The number of buckets in this hash table
- uint32_t hashes_count; // The total number of unique hash values and hash data offsets in this table
- uint32_t header_data_len; // The bytes to skip to get to the hash indexes (buckets) for correct alignment
- // Specifically the length of the following HeaderData field - this does not
- // include the size of the preceding fields
- HeaderData header_data; // Implementation specific header data
-};
-</pre>
-</div>
-<p>The header starts with a 32 bit "magic" value which must be 'HASH' encoded as
- an ASCII integer. This allows the detection of the start of the hash table and
- also allows the table's byte order to be determined so the table can be
- correctly extracted. The "magic" value is followed by a 16 bit version number
- which allows the table to be revised and modified in the future. The current
- version number is 1. "hash_function" is a uint16_t enumeration that specifies
- which hash function was used to produce this table. The current values for the
- hash function enumerations include:</p>
-<div class="doc_code">
-<pre>
-enum HashFunctionType
-{
- eHashFunctionDJB = 0u, // Daniel J Bernstein hash function
-};
-</pre>
-</div>
-<p>"bucket_count" is a 32 bit unsigned integer that represents how many buckets
- are in the BUCKETS array. "hashes_count" is the number of unique 32 bit hash
- values that are in the HASHES array, and is the same number of offsets are
- contained in the OFFSETS array. "header_data_len" specifies the size in
- bytes of the HeaderData that is filled in by specialized versions of this
- table.</p>
-
-<h5>Fixed Lookup</h5>
-<p>The header is followed by the buckets, hashes, offsets, and hash value
- data.
-<div class="doc_code">
-<pre>
-struct FixedTable
-{
- uint32_t buckets[Header.bucket_count]; // An array of hash indexes into the "hashes[]" array below
- uint32_t hashes [Header.hashes_count]; // Every unique 32 bit hash for the entire table is in this table
- uint32_t offsets[Header.hashes_count]; // An offset that corresponds to each item in the "hashes[]" array above
-};
-</pre>
-</div>
-<p>"buckets" is an array of 32 bit indexes into the "hashes" array. The
- "hashes" array contains all of the 32 bit hash values for all names in the
- hash table. Each hash in the "hashes" table has an offset in the "offsets"
- array that points to the data for the hash value.</p>
-
-<p>This table setup makes it very easy to repurpose these tables to contain
- different data, while keeping the lookup mechanism the same for all tables.
- This layout also makes it possible to save the table to disk and map it in
- later and do very efficient name lookups with little or no parsing.</p>
-
-<p>DWARF lookup tables can be implemented in a variety of ways and can store
- a lot of information for each name. We want to make the DWARF tables
- extensible and able to store the data efficiently so we have used some of the
- DWARF features that enable efficient data storage to define exactly what kind
- of data we store for each name.</p>
-
-<p>The "HeaderData" contains a definition of the contents of each HashData
- chunk. We might want to store an offset to all of the debug information
- entries (DIEs) for each name. To keep things extensible, we create a list of
- items, or Atoms, that are contained in the data for each name. First comes the
- type of the data in each atom:</p>
-<div class="doc_code">
-<pre>
-enum AtomType
-{
- eAtomTypeNULL = 0u,
- eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding
- eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that contains the item in question
- eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as DW_FORM_data1 (if no tags exceed 255) or DW_FORM_data2
- eAtomTypeNameFlags = 4u, // Flags from enum NameFlags
- eAtomTypeTypeFlags = 5u, // Flags from enum TypeFlags
-};
-</pre>
-</div>
-<p>The enumeration values and their meanings are:</p>
-<div class="doc_code">
-<pre>
- eAtomTypeNULL - a termination atom that specifies the end of the atom list
- eAtomTypeDIEOffset - an offset into the .debug_info section for the DWARF DIE for this name
- eAtomTypeCUOffset - an offset into the .debug_info section for the CU that contains the DIE
- eAtomTypeDIETag - The DW_TAG_XXX enumeration value so you don't have to parse the DWARF to see what it is
- eAtomTypeNameFlags - Flags for functions and global variables (isFunction, isInlined, isExternal...)
- eAtomTypeTypeFlags - Flags for types (isCXXClass, isObjCClass, ...)
-</pre>
-</div>
-<p>Then we allow each atom type to define the atom type and how the data for
- each atom type data is encoded:</p>
-<div class="doc_code">
-<pre>
-struct Atom
-{
- uint16_t type; // AtomType enum value
- uint16_t form; // DWARF DW_FORM_XXX defines
-};
-</pre>
-</div>
-<p>The "form" type above is from the DWARF specification and defines the
- exact encoding of the data for the Atom type. See the DWARF specification for
- the DW_FORM_ definitions.</p>
-<div class="doc_code">
-<pre>
-struct HeaderData
-{
- uint32_t die_offset_base;
- uint32_t atom_count;
- Atoms atoms[atom_count0];
-};
-</pre>
-</div>
-<p>"HeaderData" defines the base DIE offset that should be added to any atoms
- that are encoded using the DW_FORM_ref1, DW_FORM_ref2, DW_FORM_ref4,
- DW_FORM_ref8 or DW_FORM_ref_udata. It also defines what is contained in
- each "HashData" object -- Atom.form tells us how large each field will be in
- the HashData and the Atom.type tells us how this data should be interpreted.</p>
-
-<p>For the current implementations of the ".apple_names" (all functions + globals),
- the ".apple_types" (names of all types that are defined), and the
- ".apple_namespaces" (all namespaces), we currently set the Atom array to be:</p>
-<div class="doc_code">
-<pre>
-HeaderData.atom_count = 1;
-HeaderData.atoms[0].type = eAtomTypeDIEOffset;
-HeaderData.atoms[0].form = DW_FORM_data4;
-</pre>
-</div>
-<p>This defines the contents to be the DIE offset (eAtomTypeDIEOffset) that is
- encoded as a 32 bit value (DW_FORM_data4). This allows a single name to have
- multiple matching DIEs in a single file, which could come up with an inlined
- function for instance. Future tables could include more information about the
- DIE such as flags indicating if the DIE is a function, method, block,
- or inlined.</p>
-
-<p>The KeyType for the DWARF table is a 32 bit string table offset into the
- ".debug_str" table. The ".debug_str" is the string table for the DWARF which
- may already contain copies of all of the strings. This helps make sure, with
- help from the compiler, that we reuse the strings between all of the DWARF
- sections and keeps the hash table size down. Another benefit to having the
- compiler generate all strings as DW_FORM_strp in the debug info, is that
- DWARF parsing can be made much faster.</p>
-
-<p>After a lookup is made, we get an offset into the hash data. The hash data
- needs to be able to deal with 32 bit hash collisions, so the chunk of data
- at the offset in the hash data consists of a triple:</p>
-<div class="doc_code">
-<pre>
-uint32_t str_offset
-uint32_t hash_data_count
-HashData[hash_data_count]
-</pre>
-</div>
-<p>If "str_offset" is zero, then the bucket contents are done. 99.9% of the
- hash data chunks contain a single item (no 32 bit hash collision):</p>
-<div class="doc_code">
-<pre>
-.------------.
-| 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main")
-| 0x00000004 | uint32_t HashData count
-| 0x........ | uint32_t HashData[0] DIE offset
-| 0x........ | uint32_t HashData[1] DIE offset
-| 0x........ | uint32_t HashData[2] DIE offset
-| 0x........ | uint32_t HashData[3] DIE offset
-| 0x00000000 | uint32_t KeyType (end of hash chain)
-`------------'
-</pre>
-</div>
-<p>If there are collisions, you will have multiple valid string offsets:</p>
-<div class="doc_code">
-<pre>
-.------------.
-| 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main")
-| 0x00000004 | uint32_t HashData count
-| 0x........ | uint32_t HashData[0] DIE offset
-| 0x........ | uint32_t HashData[1] DIE offset
-| 0x........ | uint32_t HashData[2] DIE offset
-| 0x........ | uint32_t HashData[3] DIE offset
-| 0x00002023 | uint32_t KeyType (.debug_str[0x0002023] => "print")
-| 0x00000002 | uint32_t HashData count
-| 0x........ | uint32_t HashData[0] DIE offset
-| 0x........ | uint32_t HashData[1] DIE offset
-| 0x00000000 | uint32_t KeyType (end of hash chain)
-`------------'
-</pre>
-</div>
-<p>Current testing with real world C++ binaries has shown that there is around 1
- 32 bit hash collision per 100,000 name entries.</p>
-</div>
-<!-- ======================================================================= -->
-<h4>
- <a name="acceltablecontents">Contents</a>
-</h4>
-<!-- ======================================================================= -->
-<div>
-<p>As we said, we want to strictly define exactly what is included in the
- different tables. For DWARF, we have 3 tables: ".apple_names", ".apple_types",
- and ".apple_namespaces".</p>
-
-<p>".apple_names" sections should contain an entry for each DWARF DIE whose
- DW_TAG is a DW_TAG_label, DW_TAG_inlined_subroutine, or DW_TAG_subprogram that
- has address attributes: DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges or
- DW_AT_entry_pc. It also contains DW_TAG_variable DIEs that have a DW_OP_addr
- in the location (global and static variables). All global and static variables
- should be included, including those scoped within functions and classes. For
- example using the following code:</p>
-<div class="doc_code">
-<pre>
-static int var = 0;
-
-void f ()
-{
- static int var = 0;
-}
-</pre>
-</div>
-<p>Both of the static "var" variables would be included in the table. All
- functions should emit both their full names and their basenames. For C or C++,
- the full name is the mangled name (if available) which is usually in the
- DW_AT_MIPS_linkage_name attribute, and the DW_AT_name contains the function
- basename. If global or static variables have a mangled name in a
- DW_AT_MIPS_linkage_name attribute, this should be emitted along with the
- simple name found in the DW_AT_name attribute.</p>
-
-<p>".apple_types" sections should contain an entry for each DWARF DIE whose
- tag is one of:</p>
-<ul>
- <li>DW_TAG_array_type</li>
- <li>DW_TAG_class_type</li>
- <li>DW_TAG_enumeration_type</li>
- <li>DW_TAG_pointer_type</li>
- <li>DW_TAG_reference_type</li>
- <li>DW_TAG_string_type</li>
- <li>DW_TAG_structure_type</li>
- <li>DW_TAG_subroutine_type</li>
- <li>DW_TAG_typedef</li>
- <li>DW_TAG_union_type</li>
- <li>DW_TAG_ptr_to_member_type</li>
- <li>DW_TAG_set_type</li>
- <li>DW_TAG_subrange_type</li>
- <li>DW_TAG_base_type</li>
- <li>DW_TAG_const_type</li>
- <li>DW_TAG_constant</li>
- <li>DW_TAG_file_type</li>
- <li>DW_TAG_namelist</li>
- <li>DW_TAG_packed_type</li>
- <li>DW_TAG_volatile_type</li>
- <li>DW_TAG_restrict_type</li>
- <li>DW_TAG_interface_type</li>
- <li>DW_TAG_unspecified_type</li>
- <li>DW_TAG_shared_type</li>
-</ul>
-<p>Only entries with a DW_AT_name attribute are included, and the entry must
- not be a forward declaration (DW_AT_declaration attribute with a non-zero value).
- For example, using the following code:</p>
-<div class="doc_code">
-<pre>
-int main ()
-{
- int *b = 0;
- return *b;
-}
-</pre>
-</div>
-<p>We get a few type DIEs:</p>
-<div class="doc_code">
-<pre>
-0x00000067: TAG_base_type [5]
- AT_encoding( DW_ATE_signed )
- AT_name( "int" )
- AT_byte_size( 0x04 )
-
-0x0000006e: TAG_pointer_type [6]
- AT_type( {0x00000067} ( int ) )
- AT_byte_size( 0x08 )
-</pre>
-</div>
-<p>The DW_TAG_pointer_type is not included because it does not have a DW_AT_name.</p>
-
-<p>".apple_namespaces" section should contain all DW_TAG_namespace DIEs. If
- we run into a namespace that has no name this is an anonymous namespace,
- and the name should be output as "(anonymous namespace)" (without the quotes).
- Why? This matches the output of the abi::cxa_demangle() that is in the standard
- C++ library that demangles mangled names.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h4>
- <a name="acceltableextensions">Language Extensions and File Format Changes</a>
-</h4>
-<!-- ======================================================================= -->
-<div>
-<h5>Objective-C Extensions</h5>
-<p>".apple_objc" section should contain all DW_TAG_subprogram DIEs for an
- Objective-C class. The name used in the hash table is the name of the
- Objective-C class itself. If the Objective-C class has a category, then an
- entry is made for both the class name without the category, and for the class
- name with the category. So if we have a DIE at offset 0x1234 with a name
- of method "-[NSString(my_additions) stringWithSpecialString:]", we would add
- an entry for "NSString" that points to DIE 0x1234, and an entry for
- "NSString(my_additions)" that points to 0x1234. This allows us to quickly
- track down all Objective-C methods for an Objective-C class when doing
- expressions. It is needed because of the dynamic nature of Objective-C where
- anyone can add methods to a class. The DWARF for Objective-C methods is also
- emitted differently from C++ classes where the methods are not usually
- contained in the class definition, they are scattered about across one or more
- compile units. Categories can also be defined in different shared libraries.
- So we need to be able to quickly find all of the methods and class functions
- given the Objective-C class name, or quickly find all methods and class
- functions for a class + category name. This table does not contain any selector
- names, it just maps Objective-C class names (or class names + category) to all
- of the methods and class functions. The selectors are added as function
- basenames in the .debug_names section.</p>
-
-<p>In the ".apple_names" section for Objective-C functions, the full name is the
- entire function name with the brackets ("-[NSString stringWithCString:]") and the
- basename is the selector only ("stringWithCString:").</p>
-
-<h5>Mach-O Changes</h5>
-<p>The sections names for the apple hash tables are for non mach-o files. For
- mach-o files, the sections should be contained in the "__DWARF" segment with
- names as follows:</p>
-<ul>
- <li>".apple_names" -> "__apple_names"</li>
- <li>".apple_types" -> "__apple_types"</li>
- <li>".apple_namespaces" -> "__apple_namespac" (16 character limit)</li>
- <li> ".apple_objc" -> "__apple_objc"</li>
-</ul>
-</div>
-</div>
-</div>
-
-<!-- *********************************************************************** -->
-
-<hr>
-<address>
- <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
- src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
- <a href="http://validator.w3.org/check/referer"><img
- src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
- <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
- <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
- Last modified: $Date$
-</address>
-
-</body>
-</html>
diff --git a/docs/SourceLevelDebugging.rst b/docs/SourceLevelDebugging.rst
new file mode 100644
index 0000000000..2bbf2e2c35
--- /dev/null
+++ b/docs/SourceLevelDebugging.rst
@@ -0,0 +1,2285 @@
+================================
+Source Level Debugging with LLVM
+================================
+
+.. sectionauthor:: Chris Lattner <sabre@nondot.org> and Jim Laskey <jlaskey@mac.com>
+
+.. contents::
+ :local:
+
+Introduction
+============
+
+This document is the central repository for all information pertaining to debug
+information in LLVM. It describes the :ref:`actual format that the LLVM debug
+information takes <format>`, which is useful for those interested in creating
+front-ends or dealing directly with the information. Further, this document
+provides specific examples of what debug information for C/C++ looks like.
+
+Philosophy behind LLVM debugging information
+--------------------------------------------
+
+The idea of the LLVM debugging information is to capture how the important
+pieces of the source-language's Abstract Syntax Tree map onto LLVM code.
+Several design aspects have shaped the solution that appears here. The
+important ones are:
+
+* Debugging information should have very little impact on the rest of the
+ compiler. No transformations, analyses, or code generators should need to
+ be modified because of debugging information.
+
+* LLVM optimizations should interact in :ref:`well-defined and easily described
+ ways <intro_debugopt>` with the debugging information.
+
+* Because LLVM is designed to support arbitrary programming languages,
+ LLVM-to-LLVM tools should not need to know anything about the semantics of
+ the source-level-language.
+
+* Source-level languages are often **widely** different from one another.
+ LLVM should not put any restrictions of the flavor of the source-language,
+ and the debugging information should work with any language.
+
+* With code generator support, it should be possible to use an LLVM compiler
+ to compile a program to native machine code and standard debugging
+ formats. This allows compatibility with traditional machine-code level
+ debuggers, like GDB or DBX.
+
+The approach used by the LLVM implementation is to use a small set of
+:ref:`intrinsic functions <format_common_intrinsics>` to define a mapping
+between LLVM program objects and the source-level objects. The description of
+the source-level program is maintained in LLVM metadata in an
+:ref:`implementation-defined format <ccxx_frontend>` (the C/C++ front-end
+currently uses working draft 7 of the `DWARF 3 standard
+<http://www.eagercon.com/dwarf/dwarf3std.htm>`_).
+
+When a program is being debugged, a debugger interacts with the user and turns
+the stored debug information into source-language specific information. As
+such, a debugger must be aware of the source-language, and is thus tied to a
+specific language or family of languages.
+
+Debug information consumers
+---------------------------
+
+The role of debug information is to provide meta information normally stripped
+away during the compilation process. This meta information provides an LLVM
+user a relationship between generated code and the original program source
+code.
+
+Currently, debug information is consumed by DwarfDebug to produce dwarf
+information used by the gdb debugger. Other targets could use the same
+information to produce stabs or other debug forms.
+
+It would also be reasonable to use debug information to feed profiling tools
+for analysis of generated code, or, tools for reconstructing the original
+source from generated code.
+
+TODO - expound a bit more.
+
+.. _intro_debugopt:
+
+Debugging optimized code
+------------------------
+
+An extremely high priority of LLVM debugging information is to make it interact
+well with optimizations and analysis. In particular, the LLVM debug
+information provides the following guarantees:
+
+* LLVM debug information **always provides information to accurately read
+ the source-level state of the program**, regardless of which LLVM
+ optimizations have been run, and without any modification to the
+ optimizations themselves. However, some optimizations may impact the
+ ability to modify the current state of the program with a debugger, such
+ as setting program variables, or calling functions that have been
+ deleted.
+
+* As desired, LLVM optimizations can be upgraded to be aware of the LLVM
+ debugging information, allowing them to update the debugging information
+ as they perform aggressive optimizations. This means that, with effort,
+ the LLVM optimizers could optimize debug code just as well as non-debug
+ code.
+
+* LLVM debug information does not prevent optimizations from
+ happening (for example inlining, basic block reordering/merging/cleanup,
+ tail duplication, etc).
+
+* LLVM debug information is automatically optimized along with the rest of
+ the program, using existing facilities. For example, duplicate
+ information is automatically merged by the linker, and unused information
+ is automatically removed.
+
+Basically, the debug information allows you to compile a program with
+"``-O0 -g``" and get full debug information, allowing you to arbitrarily modify
+the program as it executes from a debugger. Compiling a program with
+"``-O3 -g``" gives you full debug information that is always available and
+accurate for reading (e.g., you get accurate stack traces despite tail call
+elimination and inlining), but you might lose the ability to modify the program
+and call functions where were optimized out of the program, or inlined away
+completely.
+
+:ref:`LLVM test suite <test-suite-quickstart>` provides a framework to test
+optimizer's handling of debugging information. It can be run like this:
+
+.. code-block:: bash
+
+ % cd llvm/projects/test-suite/MultiSource/Benchmarks # or some other level
+ % make TEST=dbgopt
+
+This will test impact of debugging information on optimization passes. If
+debugging information influences optimization passes then it will be reported
+as a failure. See :doc:`TestingGuide` for more information on LLVM test
+infrastructure and how to run various tests.
+
+.. _format:
+
+Debugging information format
+============================
+
+LLVM debugging information has been carefully designed to make it possible for
+the optimizer to optimize the program and debugging information without
+necessarily having to know anything about debugging information. In
+particular, the use of metadata avoids duplicated debugging information from
+the beginning, and the global dead code elimination pass automatically deletes
+debugging information for a function if it decides to delete the function.
+
+To do this, most of the debugging information (descriptors for types,
+variables, functions, source files, etc) is inserted by the language front-end
+in the form of LLVM metadata.
+
+Debug information is designed to be agnostic about the target debugger and
+debugging information representation (e.g. DWARF/Stabs/etc). It uses a generic
+pass to decode the information that represents variables, types, functions,
+namespaces, etc: this allows for arbitrary source-language semantics and
+type-systems to be used, as long as there is a module written for the target
+debugger to interpret the information.
+
+To provide basic functionality, the LLVM debugger does have to make some
+assumptions about the source-level language being debugged, though it keeps
+these to a minimum. The only common features that the LLVM debugger assumes
+exist are :ref:`source files <format_files>`, and :ref:`program objects
+<format_global_variables>`. These abstract objects are used by a debugger to
+form stack traces, show information about local variables, etc.
+
+This section of the documentation first describes the representation aspects
+common to any source-language. :ref:`ccxx_frontend` describes the data layout
+conventions used by the C and C++ front-ends.
+
+Debug information descriptors
+-----------------------------
+
+In consideration of the complexity and volume of debug information, LLVM
+provides a specification for well formed debug descriptors.
+
+Consumers of LLVM debug information expect the descriptors for program objects
+to start in a canonical format, but the descriptors can include additional
+information appended at the end that is source-language specific. All LLVM
+debugging information is versioned, allowing backwards compatibility in the
+case that the core structures need to change in some way. Also, all debugging
+information objects start with a tag to indicate what type of object it is.
+The source-language is allowed to define its own objects, by using unreserved
+tag numbers. We recommend using with tags in the range 0x1000 through 0x2000
+(there is a defined ``enum DW_TAG_user_base = 0x1000``.)
+
+The fields of debug descriptors used internally by LLVM are restricted to only
+the simple data types ``i32``, ``i1``, ``float``, ``double``, ``mdstring`` and
+``mdnode``.
+
+.. code-block:: llvm
+
+ !1 = metadata !{
+ i32, ;; A tag
+ ...
+ }
+
+<a name="LLVMDebugVersion">The first field of a descriptor is always an
+``i32`` containing a tag value identifying the content of the descriptor.
+The remaining fields are specific to the descriptor. The values of tags are
+loosely bound to the tag values of DWARF information entries. However, that
+does not restrict the use of the information supplied to DWARF targets. To
+facilitate versioning of debug information, the tag is augmented with the
+current debug version (``LLVMDebugVersion = 8 << 16`` or 0x80000 or
+524288.)
+
+The details of the various descriptors follow.
+
+Compile unit descriptors
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+ !0 = metadata !{
+ i32, ;; Tag = 17 + LLVMDebugVersion (DW_TAG_compile_unit)
+ i32, ;; Unused field.
+ i32, ;; DWARF language identifier (ex. DW_LANG_C89)
+ metadata, ;; Source file name
+ metadata, ;; Source file directory (includes trailing slash)
+ metadata ;; Producer (ex. "4.0.1 LLVM (LLVM research group)")
+ i1, ;; True if this is a main compile unit.
+ i1, ;; True if this is optimized.
+ metadata, ;; Flags
+ i32 ;; Runtime version
+ metadata ;; List of enums types
+ metadata ;; List of retained types
+ metadata ;; List of subprograms
+ metadata ;; List of global variables
+ }
+
+These descriptors contain a source language ID for the file (we use the DWARF
+3.0 ID numbers, such as ``DW_LANG_C89``, ``DW_LANG_C_plus_plus``,
+``DW_LANG_Cobol74``, etc), three strings describing the filename, working
+directory of the compiler, and an identifier string for the compiler that
+produced it.
+
+Compile unit descriptors provide the root context for objects declared in a
+specific compilation unit. File descriptors are defined using this context.
+These descriptors are collected by a named metadata ``!llvm.dbg.cu``. Compile
+unit descriptor keeps track of subprograms, global variables and type
+information.
+
+.. _format_files:
+
+File descriptors
+^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+ !0 = metadata !{
+ i32, ;; Tag = 41 + LLVMDebugVersion (DW_TAG_file_type)
+ metadata, ;; Source file name
+ metadata, ;; Source file directory (includes trailing slash)
+ metadata ;; Unused
+ }
+
+These descriptors contain information for a file. Global variables and top
+level functions would be defined using this context. File descriptors also
+provide context for source line correspondence.
+
+Each input file is encoded as a separate file descriptor in LLVM debugging
+information output.
+
+.. _format_global_variables:
+
+Global variable descriptors
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+ !1 = metadata !{
+ i32, ;; Tag = 52 + LLVMDebugVersion (DW_TAG_variable)
+ i32, ;; Unused field.
+ metadata, ;; Reference to context descriptor
+ metadata, ;; Name
+ metadata, ;; Display name (fully qualified C++ name)
+ metadata, ;; MIPS linkage name (for C++)
+ metadata, ;; Reference to file where defined
+ i32, ;; Line number where defined
+ metadata, ;; Reference to type descriptor
+ i1, ;; True if the global is local to compile unit (static)
+ i1, ;; True if the global is defined in the compile unit (not extern)
+ {}* ;; Reference to the global variable
+ }
+
+These descriptors provide debug information about globals variables. The
+provide details such as name, type and where the variable is defined. All
+global variables are collected inside the named metadata ``!llvm.dbg.cu``.
+
+.. _format_subprograms:
+
+Subprogram descriptors
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+ !2 = metadata !{
+ i32, ;; Tag = 46 + LLVMDebugVersion (DW_TAG_subprogram)
+ i32, ;; Unused field.
+ metadata, ;; Reference to context descriptor
+ metadata, ;; Name
+ metadata, ;; Display name (fully qualified C++ name)
+ metadata, ;; MIPS linkage name (for C++)
+ metadata, ;; Reference to file where defined
+ i32, ;; Line number where defined
+ metadata, ;; Reference to type descriptor
+ i1, ;; True if the global is local to compile unit (static)
+ i1, ;; True if the global is defined in the compile unit (not extern)
+ i32, ;; Line number where the scope of the subprogram begins
+ i32, ;; Virtuality, e.g. dwarf::DW_VIRTUALITY__virtual
+ i32, ;; Index into a virtual function
+ metadata, ;; indicates which base type contains the vtable pointer for the
+ ;; derived class
+ i32, ;; Flags - Artifical, Private, Protected, Explicit, Prototyped.
+ i1, ;; isOptimized
+ Function * , ;; Pointer to LLVM function
+ metadata, ;; Lists function template parameters
+ metadata, ;; Function declaration descriptor
+ metadata ;; List of function variables
+ }
+
+These descriptors provide debug information about functions, methods and
+subprograms. They provide details such as name, return types and the source
+location where the subprogram is defined.
+
+Block descriptors
+^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+ !3 = metadata !{
+ i32, ;; Tag = 11 + LLVMDebugVersion (DW_TAG_lexical_block)
+ metadata,;; Reference to context descriptor
+ i32, ;; Line number
+ i32, ;; Column number
+ metadata,;; Reference to source file
+ i32 ;; Unique ID to identify blocks from a template function
+ }
+
+This descriptor provides debug information about nested blocks within a
+subprogram. The line number and column numbers are used to dinstinguish two
+lexical blocks at same depth.
+
+.. code-block:: llvm
+
+ !3 = metadata !{
+ i32, ;; Tag = 11 + LLVMDebugVersion (DW_TAG_lexical_block)
+ metadata ;; Reference to the scope we're annotating with a file change
+ metadata,;; Reference to the file the scope is enclosed in.
+ }
+
+This descriptor provides a wrapper around a lexical scope to handle file
+changes in the middle of a lexical block.
+
+.. _format_basic_type:
+
+Basic type descriptors
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+ !4 = metadata !{
+ i32, ;; Tag = 36 + LLVMDebugVersion (DW_TAG_base_type)
+ metadata, ;; Reference to context
+ metadata, ;; Name (may be "" for anonymous types)
+ metadata, ;; Reference to file where defined (may be NULL)
+ i32, ;; Line number where defined (may be 0)
+ i64, ;; Size in bits
+ i64, ;; Alignment in bits
+ i64, ;; Offset in bits
+ i32, ;; Flags
+ i32 ;; DWARF type encoding
+ }
+
+These descriptors define primitive types used in the code. Example ``int``,
+``bool`` and ``float``. The context provides the scope of the type, which is
+usually the top level. Since basic types are not usually user defined the
+context and line number can be left as NULL and 0. The size, alignment and
+offset are expressed in bits and can be 64 bit values. The alignment is used
+to round the offset when embedded in a :ref:`composite type
+<format_composite_type>` (example to keep float doubles on 64 bit boundaries).
+The offset is the bit offset if embedded in a :ref:`composite type
+<format_composite_type>`.
+
+The type encoding provides the details of the type. The values are typically
+one of the following:
+
+.. code-block:: llvm
+
+ DW_ATE_address = 1
+ DW_ATE_boolean = 2
+ DW_ATE_float = 4
+ DW_ATE_signed = 5
+ DW_ATE_signed_char = 6
+ DW_ATE_unsigned = 7
+ DW_ATE_unsigned_char = 8
+
+.. _format_derived_type:
+
+Derived type descriptors
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+ !5 = metadata !{
+ i32, ;; Tag (see below)
+ metadata, ;; Reference to context
+ metadata, ;; Name (may be "" for anonymous types)
+ metadata, ;; Reference to file where defined (may be NULL)
+ i32, ;; Line number where defined (may be 0)
+ i64, ;; Size in bits
+ i64, ;; Alignment in bits
+ i64, ;; Offset in bits
+ i32, ;; Flags to encode attributes, e.g. private
+ metadata, ;; Reference to type derived from
+ metadata, ;; (optional) Name of the Objective C property associated with
+ ;; Objective-C an ivar
+ metadata, ;; (optional) Name of the Objective C property getter selector.
+ metadata, ;; (optional) Name of the Objective C property setter selector.
+ i32 ;; (optional) Objective C property attributes.
+ }
+
+These descriptors are used to define types derived from other types. The value
+of the tag varies depending on the meaning. The following are possible tag
+values:
+
+.. code-block:: llvm
+
+ DW_TAG_formal_parameter = 5
+ DW_TAG_member = 13
+ DW_TAG_pointer_type = 15
+ DW_TAG_reference_type = 16
+ DW_TAG_typedef = 22
+ DW_TAG_const_type = 38
+ DW_TAG_volatile_type = 53
+ DW_TAG_restrict_type = 55
+
+``DW_TAG_member`` is used to define a member of a :ref:`composite type
+<format_composite_type>` or :ref:`subprogram <format_subprograms>`. The type
+of the member is the :ref:`derived type <format_derived_type>`.
+``DW_TAG_formal_parameter`` is used to define a member which is a formal
+argument of a subprogram.
+
+``DW_TAG_typedef`` is used to provide a name for the derived type.
+
+``DW_TAG_pointer_type``, ``DW_TAG_reference_type``, ``DW_TAG_const_type``,
+``DW_TAG_volatile_type`` and ``DW_TAG_restrict_type`` are used to qualify the
+:ref:`derived type <format_derived_type>`.
+
+:ref:`Derived type <format_derived_type>` location can be determined from the
+context and line number. The size, alignment and offset are expressed in bits
+and can be 64 bit values. The alignment is used to round the offset when
+embedded in a :ref:`composite type <format_composite_type>` (example to keep
+float doubles on 64 bit boundaries.) The offset is the bit offset if embedded
+in a :ref:`composite type <format_composite_type>`.
+
+Note that the ``void *`` type is expressed as a type derived from NULL.
+
+.. _format_composite_type:
+
+Composite type descriptors
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+ !6 = metadata !{
+ i32, ;; Tag (see below)
+ metadata, ;; Reference to context
+ metadata, ;; Name (may be "" for anonymous types)
+ metadata, ;; Reference to file where defined (may be NULL)
+ i32, ;; Line number where defined (may be 0)
+ i64, ;; Size in bits
+ i64, ;; Alignment in bits
+ i64, ;; Offset in bits
+ i32, ;; Flags
+ metadata, ;; Reference to type derived from
+ metadata, ;; Reference to array of member descriptors
+ i32 ;; Runtime languages
+ }
+
+These descriptors are used to define types that are composed of 0 or more
+elements. The value of the tag varies depending on the meaning. The following
+are possible tag values:
+
+.. code-block:: llvm
+
+ DW_TAG_array_type = 1
+ DW_TAG_enumeration_type = 4
+ DW_TAG_structure_type = 19
+ DW_TAG_union_type = 23
+ DW_TAG_vector_type = 259
+ DW_TAG_subroutine_type = 21
+ DW_TAG_inheritance = 28
+
+The vector flag indicates that an array type is a native packed vector.
+
+The members of array types (tag = ``DW_TAG_array_type``) or vector types (tag =
+``DW_TAG_vector_type``) are :ref:`subrange descriptors <format_subrange>`, each
+representing the range of subscripts at that level of indexing.
+
+The members of enumeration types (tag = ``DW_TAG_enumeration_type``) are
+:ref:`enumerator descriptors <format_enumerator>`, each representing the
+definition of enumeration value for the set. All enumeration type descriptors
+are collected inside the named metadata ``!llvm.dbg.cu``.
+
+The members of structure (tag = ``DW_TAG_structure_type``) or union (tag =
+``DW_TAG_union_type``) types are any one of the :ref:`basic
+<format_basic_type>`, :ref:`derived <format_derived_type>` or :ref:`composite
+<format_composite_type>` type descriptors, each representing a field member of
+the structure or union.
+
+For C++ classes (tag = ``DW_TAG_structure_type``), member descriptors provide
+information about base classes, static members and member functions. If a
+member is a :ref:`derived type descriptor <format_derived_type>` and has a tag
+of ``DW_TAG_inheritance``, then the type represents a base class. If the member
+of is a :ref:`global variable descriptor <format_global_variables>` then it
+represents a static member. And, if the member is a :ref:`subprogram
+descriptor <format_subprograms>` then it represents a member function. For
+static members and member functions, ``getName()`` returns the members link or
+the C++ mangled name. ``getDisplayName()`` the simplied version of the name.
+
+The first member of subroutine (tag = ``DW_TAG_subroutine_type``) type elements
+is the return type for the subroutine. The remaining elements are the formal
+arguments to the subroutine.
+
+:ref:`Composite type <format_composite_type>` location can be determined from
+the context and line number. The size, alignment and offset are expressed in
+bits and can be 64 bit values. The alignment is used to round the offset when
+embedded in a :ref:`composite type <format_composite_type>` (as an example, to
+keep float doubles on 64 bit boundaries). The offset is the bit offset if
+embedded in a :ref:`composite type <format_composite_type>`.
+
+.. _format_subrange:
+
+Subrange descriptors
+^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+ !42 = metadata !{
+ i32, ;; Tag = 33 + LLVMDebugVersion (DW_TAG_subrange_type)
+ i64, ;; Low value
+ i64 ;; High value
+ }
+
+These descriptors are used to define ranges of array subscripts for an array
+:ref:`composite type <format_composite_type>`. The low value defines the lower
+bounds typically zero for C/C++. The high value is the upper bounds. Values
+are 64 bit. ``High - Low + 1`` is the size of the array. If ``Low > High``
+the array bounds are not included in generated debugging information.
+
+.. _format_enumerator:
+
+Enumerator descriptors
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+ !6 = metadata !{
+ i32, ;; Tag = 40 + LLVMDebugVersion (DW_TAG_enumerator)
+ metadata, ;; Name
+ i64 ;; Value
+ }
+
+These descriptors are used to define members of an enumeration :ref:`composite
+type <format_composite_type>`, it associates the name to the value.
+
+Local variables
+^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+ !7 = metadata !{
+ i32, ;; Tag (see below)
+ metadata, ;; Context
+ metadata, ;; Name
+ metadata, ;; Reference to file where defined
+ i32, ;; 24 bit - Line number where defined
+ ;; 8 bit - Argument number. 1 indicates 1st argument.
+ metadata, ;; Type descriptor
+ i32, ;; flags
+ metadata ;; (optional) Reference to inline location
+ }
+
+These descriptors are used to define variables local to a sub program. The
+value of the tag depends on the usage of the variable:
+
+.. code-block:: llvm
+
+ DW_TAG_auto_variable = 256
+ DW_TAG_arg_variable = 257
+ DW_TAG_return_variable = 258
+
+An auto variable is any variable declared in the body of the function. An
+argument variable is any variable that appears as a formal argument to the
+function. A return variable is used to track the result of a function and has
+no source correspondent.
+
+The context is either the subprogram or block where the variable is defined.
+Name the source variable name. Context and line indicate where the variable
+was defined. Type descriptor defines the declared type of the variable.
+
+.. _format_common_intrinsics:
+
+Debugger intrinsic functions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+LLVM uses several intrinsic functions (name prefixed with "``llvm.dbg``") to
+provide debug information at various points in generated code.
+
+``llvm.dbg.declare``
+^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+ void %llvm.dbg.declare(metadata, metadata)
+
+This intrinsic provides information about a local element (e.g., variable).
+The first argument is metadata holding the alloca for the variable. The second
+argument is metadata containing a description of the variable.
+
+``llvm.dbg.value``
+^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+ void %llvm.dbg.value(metadata, i64, metadata)
+
+This intrinsic provides information when a user source variable is set to a new
+value. The first argument is the new value (wrapped as metadata). The second
+argument is the offset in the user source variable where the new value is
+written. The third argument is metadata containing a description of the user
+source variable.
+
+Object lifetimes and scoping
+============================
+
+In many languages, the local variables in functions can have their lifetimes or
+scopes limited to a subset of a function. In the C family of languages, for
+example, variables are only live (readable and writable) within the source
+block that they are defined in. In functional languages, values are only
+readable after they have been defined. Though this is a very obvious concept,
+it is non-trivial to model in LLVM, because it has no notion of scoping in this
+sense, and does not want to be tied to a language's scoping rules.
+
+In order to handle this, the LLVM debug format uses the metadata attached to
+llvm instructions to encode line number and scoping information. Consider the
+following C fragment, for example:
+
+.. code-block:: c
+
+ 1. void foo() {
+ 2. int X = 21;
+ 3. int Y = 22;
+ 4. {
+ 5. int Z = 23;
+ 6. Z = X;
+ 7. }
+ 8. X = Y;
+ 9. }
+
+Compiled to LLVM, this function would be represented like this:
+
+.. code-block:: llvm
+
+ define void @foo() nounwind ssp {
+ entry:
+ %X = alloca i32, align 4 ; <i32*> [#uses=4]
+ %Y = alloca i32, align 4 ; <i32*> [#uses=4]
+ %Z = alloca i32, align 4 ; <i32*> [#uses=3]
+ %0 = bitcast i32* %X to {}* ; <{}*> [#uses=1]
+ call void @llvm.dbg.declare(metadata !{i32 * %X}, metadata !0), !dbg !7
+ store i32 21, i32* %X, !dbg !8
+ %1 = bitcast i32* %Y to {}* ; <{}*> [#uses=1]
+ call void @llvm.dbg.declare(metadata !{i32 * %Y}, metadata !9), !dbg !10
+ store i32 22, i32* %Y, !dbg !11
+ %2 = bitcast i32* %Z to {}* ; <{}*> [#uses=1]
+ call void @llvm.dbg.declare(metadata !{i32 * %Z}, metadata !12), !dbg !14
+ store i32 23, i32* %Z, !dbg !15
+ %tmp = load i32* %X, !dbg !16 ; <i32> [#uses=1]
+ %tmp1 = load i32* %Y, !dbg !16 ; <i32> [#uses=1]
+ %add = add nsw i32 %tmp, %tmp1, !dbg !16 ; <i32> [#uses=1]
+ store i32 %add, i32* %Z, !dbg !16
+ %tmp2 = load i32* %Y, !dbg !17 ; <i32> [#uses=1]
+ store i32 %tmp2, i32* %X, !dbg !17
+ ret void, !dbg !18
+ }
+
+ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+ !0 = metadata !{i32 459008, metadata !1, metadata !"X",
+ metadata !3, i32 2, metadata !6}; [ DW_TAG_auto_variable ]
+ !1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
+ !2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", metadata !"foo",
+ metadata !"foo", metadata !3, i32 1, metadata !4,
+ i1 false, i1 true}; [DW_TAG_subprogram ]
+ !3 = metadata !{i32 458769, i32 0, i32 12, metadata !"foo.c",
+ metadata !"/private/tmp", metadata !"clang 1.1", i1 true,
+ i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+ !4 = metadata !{i32 458773, metadata !3, metadata !"", null, i32 0, i64 0, i64 0,
+ i64 0, i32 0, null, metadata !5, i32 0}; [DW_TAG_subroutine_type ]
+ !5 = metadata !{null}
+ !6 = metadata !{i32 458788, metadata !3, metadata !"int", metadata !3, i32 0,
+ i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
+ !7 = metadata !{i32 2, i32 7, metadata !1, null}
+ !8 = metadata !{i32 2, i32 3, metadata !1, null}
+ !9 = metadata !{i32 459008, metadata !1, metadata !"Y", metadata !3, i32 3,
+ metadata !6}; [ DW_TAG_auto_variable ]
+ !10 = metadata !{i32 3, i32 7, metadata !1, null}
+ !11 = metadata !{i32 3, i32 3, metadata !1, null}
+ !12 = metadata !{i32 459008, metadata !13, metadata !"Z", metadata !3, i32 5,
+ metadata !6}; [ DW_TAG_auto_variable ]
+ !13 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
+ !14 = metadata !{i32 5, i32 9, metadata !13, null}
+ !15 = metadata !{i32 5, i32 5, metadata !13, null}
+ !16 = metadata !{i32 6, i32 5, metadata !13, null}
+ !17 = metadata !{i32 8, i32 3, metadata !1, null}
+ !18 = metadata !{i32 9, i32 1, metadata !2, null}
+
+This example illustrates a few important details about LLVM debugging
+information. In particular, it shows how the ``llvm.dbg.declare`` intrinsic and
+location information, which are attached to an instruction, are applied
+together to allow a debugger to analyze the relationship between statements,
+variable definitions, and the code used to implement the function.
+
+.. code-block:: llvm
+
+ call void @llvm.dbg.declare(metadata, metadata !0), !dbg !7
+
+The first intrinsic ``%llvm.dbg.declare`` encodes debugging information for the
+variable ``X``. The metadata ``!dbg !7`` attached to the intrinsic provides
+scope information for the variable ``X``.
+
+.. code-block:: llvm
+
+ !7 = metadata !{i32 2, i32 7, metadata !1, null}
+ !1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
+ !2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo",
+ metadata !"foo", metadata !"foo", metadata !3, i32 1,
+ metadata !4, i1 false, i1 true}; [DW_TAG_subprogram ]
+
+Here ``!7`` is metadata providing location information. It has four fields:
+line number, column number, scope, and original scope. The original scope
+represents inline location if this instruction is inlined inside a caller, and
+is null otherwise. In this example, scope is encoded by ``!1``. ``!1``
+represents a lexical block inside the scope ``!2``, where ``!2`` is a
+:ref:`subprogram descriptor <format_subprograms>`. This way the location
+information attached to the intrinsics indicates that the variable ``X`` is
+declared at line number 2 at a function level scope in function ``foo``.
+
+Now lets take another example.
+
+.. code-block:: llvm
+
+ call void @llvm.dbg.declare(metadata, metadata !12), !dbg !14
+
+The second intrinsic ``%llvm.dbg.declare`` encodes debugging information for
+variable ``Z``. The metadata ``!dbg !14`` attached to the intrinsic provides
+scope information for the variable ``Z``.
+
+.. code-block:: llvm
+
+ !13 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
+ !14 = metadata !{i32 5, i32 9, metadata !13, null}
+
+Here ``!14`` indicates that ``Z`` is declared at line number 5 and
+column number 9 inside of lexical scope ``!13``. The lexical scope itself
+resides inside of lexical scope ``!1`` described above.
+
+The scope information attached with each instruction provides a straightforward
+way to find instructions covered by a scope.
+
+.. _ccxx_frontend:
+
+C/C++ front-end specific debug information
+==========================================
+
+The C and C++ front-ends represent information about the program in a format
+that is effectively identical to `DWARF 3.0
+<http://www.eagercon.com/dwarf/dwarf3std.htm>`_ in terms of information
+content. This allows code generators to trivially support native debuggers by
+generating standard dwarf information, and contains enough information for
+non-dwarf targets to translate it as needed.
+
+This section describes the forms used to represent C and C++ programs. Other
+languages could pattern themselves after this (which itself is tuned to
+representing programs in the same way that DWARF 3 does), or they could choose
+to provide completely different forms if they don't fit into the DWARF model.
+As support for debugging information gets added to the various LLVM
+source-language front-ends, the information used should be documented here.
+
+The following sections provide examples of various C/C++ constructs and the
+debug information that would best describe those constructs.
+
+C/C++ source file information
+-----------------------------
+
+Given the source files ``MySource.cpp`` and ``MyHeader.h`` located in the
+directory ``/Users/mine/sources``, the following code:
+
+.. code-block:: c
+
+ #include "MyHeader.h"
+
+ int main(int argc, char *argv[]) {
+ return 0;
+ }
+
+a C/C++ front-end would generate the following descriptors:
+
+.. code-block:: llvm
+
+ ...
+ ;;
+ ;; Define the compile unit for the main source file "/Users/mine/sources/MySource.cpp".
+ ;;
+ !2 = metadata !{
+ i32 524305, ;; Tag
+ i32 0, ;; Unused
+ i32 4, ;; Language Id
+ metadata !"MySource.cpp",
+ metadata !"/Users/mine/sources",
+ metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build 00)",
+ i1 true, ;; Main Compile Unit
+ i1 false, ;; Optimized compile unit
+ metadata !"", ;; Compiler flags
+ i32 0} ;; Runtime version
+
+ ;;
+ ;; Define the file for the file "/Users/mine/sources/MySource.cpp".
+ ;;
+ !1 = metadata !{
+ i32 524329, ;; Tag
+ metadata !"MySource.cpp",
+ metadata !"/Users/mine/sources",
+ metadata !2 ;; Compile unit
+ }
+
+ ;;
+ ;; Define the file for the file "/Users/mine/sources/Myheader.h"
+ ;;
+ !3 = metadata !{
+ i32 524329, ;; Tag
+ metadata !"Myheader.h"
+ metadata !"/Users/mine/sources",
+ metadata !2 ;; Compile unit
+ }
+
+ ...
+
+``llvm::Instruction`` provides easy access to metadata attached with an
+instruction. One can extract line number information encoded in LLVM IR using
+``Instruction::getMetadata()`` and ``DILocation::getLineNumber()``.
+
+.. code-block:: c++
+
+ if (MDNode *N = I->getMetadata("dbg")) { // Here I is an LLVM instruction
+ DILocation Loc(N); // DILocation is in DebugInfo.h
+ unsigned Line = Loc.getLineNumber();
+ StringRef File = Loc.getFilename();
+ StringRef Dir = Loc.getDirectory();
+ }
+
+C/C++ global variable information
+---------------------------------
+
+Given an integer global variable declared as follows:
+
+.. code-block:: c
+
+ int MyGlobal = 100;
+
+a C/C++ front-end would generate the following descriptors:
+
+.. code-block:: llvm
+
+ ;;
+ ;; Define the global itself.
+ ;;
+ %MyGlobal = global int 100
+ ...
+ ;;
+ ;; List of debug info of globals
+ ;;
+ !llvm.dbg.cu = !{!0}
+
+ ;; Define the compile unit.
+ !0 = metadata !{
+ i32 786449, ;; Tag
+ i32 0, ;; Context
+ i32 4, ;; Language
+ metadata !"foo.cpp", ;; File
+ metadata !"/Volumes/Data/tmp", ;; Directory
+ metadata !"clang version 3.1 ", ;; Producer
+ i1 true, ;; Deprecated field
+ i1 false, ;; "isOptimized"?
+ metadata !"", ;; Flags
+ i32 0, ;; Runtime Version
+ metadata !1, ;; Enum Types
+ metadata !1, ;; Retained Types
+ metadata !1, ;; Subprograms
+ metadata !3 ;; Global Variables
+ } ; [ DW_TAG_compile_unit ]
+
+ ;; The Array of Global Variables
+ !3 = metadata !{
+ metadata !4
+ }
+
+ !4 = metadata !{
+ metadata !5
+ }
+
+ ;;
+ ;; Define the global variable itself.
+ ;;
+ !5 = metadata !{
+ i32 786484, ;; Tag
+ i32 0, ;; Unused
+ null, ;; Unused
+ metadata !"MyGlobal", ;; Name
+ metadata !"MyGlobal", ;; Display Name
+ metadata !"", ;; Linkage Name
+ metadata !6, ;; File
+ i32 1, ;; Line
+ metadata !7, ;; Type
+ i32 0, ;; IsLocalToUnit
+ i32 1, ;; IsDefinition
+ i32* @MyGlobal ;; LLVM-IR Value
+ } ; [ DW_TAG_variable ]
+
+ ;;
+ ;; Define the file
+ ;;
+ !6 = metadata !{
+ i32 786473, ;; Tag
+ metadata !"foo.cpp", ;; File
+ metadata !"/Volumes/Data/tmp", ;; Directory
+ null ;; Unused
+ } ; [ DW_TAG_file_type ]
+
+ ;;
+ ;; Define the type
+ ;;
+ !7 = metadata !{
+ i32 786468, ;; Tag
+ null, ;; Unused
+ metadata !"int", ;; Name
+ null, ;; Unused
+ i32 0, ;; Line
+ i64 32, ;; Size in Bits
+ i64 32, ;; Align in Bits
+ i64 0, ;; Offset
+ i32 0, ;; Flags
+ i32 5 ;; Encoding
+ } ; [ DW_TAG_base_type ]
+
+C/C++ function information
+--------------------------
+
+Given a function declared as follows:
+
+.. code-block:: c
+
+ int main(int argc, char *argv[]) {
+ return 0;
+ }
+
+a C/C++ front-end would generate the following descriptors:
+
+.. code-block:: llvm
+
+ ;;
+ ;; Define the anchor for subprograms. Note that the second field of the
+ ;; anchor is 46, which is the same as the tag for subprograms
+ ;; (46 = DW_TAG_subprogram.)
+ ;;
+ !6 = metadata !{
+ i32 524334, ;; Tag
+ i32 0, ;; Unused
+ metadata !1, ;; Context
+ metadata !"main", ;; Name
+ metadata !"main", ;; Display name
+ metadata !"main", ;; Linkage name
+ metadata !1, ;; File
+ i32 1, ;; Line number
+ metadata !4, ;; Type
+ i1 false, ;; Is local
+ i1 true, ;; Is definition
+ i32 0, ;; Virtuality attribute, e.g. pure virtual function
+ i32 0, ;; Index into virtual table for C++ methods
+ i32 0, ;; Type that holds virtual table.
+ i32 0, ;; Flags
+ i1 false, ;; True if this function is optimized
+ Function *, ;; Pointer to llvm::Function
+ null ;; Function template parameters
+ }
+ ;;
+ ;; Define the subprogram itself.
+ ;;
+ define i32 @main(i32 %argc, i8** %argv) {
+ ...
+ }
+
+C/C++ basic types
+-----------------
+
+The following are the basic type descriptors for C/C++ core types:
+
+bool
+^^^^
+
+.. code-block:: llvm
+
+ !2 = metadata !{
+ i32 524324, ;; Tag
+ metadata !1, ;; Context
+ metadata !"bool", ;; Name
+ metadata !1, ;; File
+ i32 0, ;; Line number
+ i64 8, ;; Size in Bits
+ i64 8, ;; Align in Bits
+ i64 0, ;; Offset in Bits
+ i32 0, ;; Flags
+ i32 2 ;; Encoding
+ }
+
+char
+^^^^
+
+.. code-block:: llvm
+
+ !2 = metadata !{
+ i32 524324, ;; Tag
+ metadata !1, ;; Context
+ metadata !"char", ;; Name
+ metadata !1, ;; File
+ i32 0, ;; Line number
+ i64 8, ;; Size in Bits
+ i64 8, ;; Align in Bits
+ i64 0, ;; Offset in Bits
+ i32 0, ;; Flags
+ i32 6 ;; Encoding
+ }
+
+unsigned char
+^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+ !2 = metadata !{
+ i32 524324, ;; Tag
+ metadata !1, ;; Context
+ metadata !"unsigned char",
+ metadata !1, ;; File
+ i32 0, ;; Line number
+ i64 8, ;; Size in Bits
+ i64 8, ;; Align in Bits
+ i64 0, ;; Offset in Bits
+ i32 0, ;; Flags
+ i32 8 ;; Encoding
+ }
+
+short
+^^^^^
+
+.. code-block:: llvm
+
+ !2 = metadata !{
+ i32 524324, ;; Tag
+ metadata !1, ;; Context
+ metadata !"short int",
+ metadata !1, ;; File
+ i32 0, ;; Line number
+ i64 16, ;; Size in Bits
+ i64 16, ;; Align in Bits
+ i64 0, ;; Offset in Bits
+ i32 0, ;; Flags
+ i32 5 ;; Encoding
+ }
+
+unsigned short
+^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+ !2 = metadata !{
+ i32 524324, ;; Tag
+ metadata !1, ;; Context
+ metadata !"short unsigned int",
+ metadata !1, ;; File
+ i32 0, ;; Line number
+ i64 16, ;; Size in Bits
+ i64 16, ;; Align in Bits
+ i64 0, ;; Offset in Bits
+ i32 0, ;; Flags
+ i32 7 ;; Encoding
+ }
+
+int
+^^^
+
+.. code-block:: llvm
+
+ !2 = metadata !{
+ i32 524324, ;; Tag
+ metadata !1, ;; Context
+ metadata !"int", ;; Name
+ metadata !1, ;; File
+ i32 0, ;; Line number
+ i64 32, ;; Size in Bits
+ i64 32, ;; Align in Bits
+ i64 0, ;; Offset in Bits
+ i32 0, ;; Flags
+ i32 5 ;; Encoding
+ }
+
+unsigned int
+^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+ !2 = metadata !{
+ i32 524324, ;; Tag
+ metadata !1, ;; Context
+ metadata !"unsigned int",
+ metadata !1, ;; File
+ i32 0, ;; Line number
+ i64 32, ;; Size in Bits
+ i64 32, ;; Align in Bits
+ i64 0, ;; Offset in Bits
+ i32 0, ;; Flags
+ i32 7 ;; Encoding
+ }
+
+long long
+^^^^^^^^^
+
+.. code-block:: llvm
+
+ !2 = metadata !{
+ i32 524324, ;; Tag
+ metadata !1, ;; Context
+ metadata !"long long int",
+ metadata !1, ;; File
+ i32 0, ;; Line number
+ i64 64, ;; Size in Bits
+ i64 64, ;; Align in Bits
+ i64 0, ;; Offset in Bits
+ i32 0, ;; Flags
+ i32 5 ;; Encoding
+ }
+
+unsigned long long
+^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+ !2 = metadata !{
+ i32 524324, ;; Tag
+ metadata !1, ;; Context
+ metadata !"long long unsigned int",
+ metadata !1, ;; File
+ i32 0, ;; Line number
+ i64 64, ;; Size in Bits
+ i64 64, ;; Align in Bits
+ i64 0, ;; Offset in Bits
+ i32 0, ;; Flags
+ i32 7 ;; Encoding
+ }
+
+float
+^^^^^
+
+.. code-block:: llvm
+
+ !2 = metadata !{
+ i32 524324, ;; Tag
+ metadata !1, ;; Context
+ metadata !"float",
+ metadata !1, ;; File
+ i32 0, ;; Line number
+ i64 32, ;; Size in Bits
+ i64 32, ;; Align in Bits
+ i64 0, ;; Offset in Bits
+ i32 0, ;; Flags
+ i32 4 ;; Encoding
+ }
+
+double
+^^^^^^
+
+.. code-block:: llvm
+
+ !2 = metadata !{
+ i32 524324, ;; Tag
+ metadata !1, ;; Context
+ metadata !"double",;; Name
+ metadata !1, ;; File
+ i32 0, ;; Line number
+ i64 64, ;; Size in Bits
+ i64 64, ;; Align in Bits
+ i64 0, ;; Offset in Bits
+ i32 0, ;; Flags
+ i32 4 ;; Encoding
+ }
+
+C/C++ derived types
+-------------------
+
+Given the following as an example of C/C++ derived type:
+
+.. code-block:: c
+
+ typedef const int *IntPtr;
+
+a C/C++ front-end would generate the following descriptors:
+
+.. code-block:: llvm
+
+ ;;
+ ;; Define the typedef "IntPtr".
+ ;;
+ !2 = metadata !{
+ i32 524310, ;; Tag
+ metadata !1, ;; Context
+ metadata !"IntPtr", ;; Name
+ metadata !3, ;; File
+ i32 0, ;; Line number
+ i64 0, ;; Size in bits
+ i64 0, ;; Align in bits
+ i64 0, ;; Offset in bits
+ i32 0, ;; Flags
+ metadata !4 ;; Derived From type
+ }
+ ;;
+ ;; Define the pointer type.
+ ;;
+ !4 = metadata !{
+ i32 524303, ;; Tag
+ metadata !1, ;; Context
+ metadata !"", ;; Name
+ metadata !1, ;; File
+ i32 0, ;; Line number
+ i64 64, ;; Size in bits
+ i64 64, ;; Align in bits
+ i64 0, ;; Offset in bits
+ i32 0, ;; Flags
+ metadata !5 ;; Derived From type
+ }
+ ;;
+ ;; Define the const type.
+ ;;
+ !5 = metadata !{
+ i32 524326, ;; Tag
+ metadata !1, ;; Context
+ metadata !"", ;; Name
+ metadata !1, ;; File
+ i32 0, ;; Line number
+ i64 32, ;; Size in bits
+ i64 32, ;; Align in bits
+ i64 0, ;; Offset in bits
+ i32 0, ;; Flags
+ metadata !6 ;; Derived From type
+ }
+ ;;
+ ;; Define the int type.
+ ;;
+ !6 = metadata !{
+ i32 524324, ;; Tag
+ metadata !1, ;; Context
+ metadata !"int", ;; Name
+ metadata !1, ;; File
+ i32 0, ;; Line number
+ i64 32, ;; Size in bits
+ i64 32, ;; Align in bits
+ i64 0, ;; Offset in bits
+ i32 0, ;; Flags
+ 5 ;; Encoding
+ }
+
+C/C++ struct/union types
+------------------------
+
+Given the following as an example of C/C++ struct type:
+
+.. code-block:: c
+
+ struct Color {
+ unsigned Red;
+ unsigned Green;
+ unsigned Blue;
+ };
+
+a C/C++ front-end would generate the following descriptors:
+
+.. code-block:: llvm
+
+ ;;
+ ;; Define basic type for unsigned int.
+ ;;
+ !5 = metadata !{
+ i32 524324, ;; Tag
+ metadata !1, ;; Context
+ metadata !"unsigned int",
+ metadata !1, ;; File
+ i32 0, ;; Line number
+ i64 32, ;; Size in Bits
+ i64 32, ;; Align in Bits
+ i64 0, ;; Offset in Bits
+ i32 0, ;; Flags
+ i32 7 ;; Encoding
+ }
+ ;;
+ ;; Define composite type for struct Color.
+ ;;
+ !2 = metadata !{
+ i32 524307, ;; Tag
+ metadata !1, ;; Context
+ metadata !"Color", ;; Name
+ metadata !1, ;; Compile unit
+ i32 1, ;; Line number
+ i64 96, ;; Size in bits
+ i64 32, ;; Align in bits
+ i64 0, ;; Offset in bits
+ i32 0, ;; Flags
+ null, ;; Derived From
+ metadata !3, ;; Elements
+ i32 0 ;; Runtime Language
+ }
+
+ ;;
+ ;; Define the Red field.
+ ;;
+ !4 = metadata !{
+ i32 524301, ;; Tag
+ metadata !1, ;; Context
+ metadata !"Red", ;; Name
+ metadata !1, ;; File
+ i32 2, ;; Line number
+ i64 32, ;; Size in bits
+ i64 32, ;; Align in bits
+ i64 0, ;; Offset in bits
+ i32 0, ;; Flags
+ metadata !5 ;; Derived From type
+ }
+
+ ;;
+ ;; Define the Green field.
+ ;;
+ !6 = metadata !{
+ i32 524301, ;; Tag
+ metadata !1, ;; Context
+ metadata !"Green", ;; Name
+ metadata !1, ;; File
+ i32 3, ;; Line number
+ i64 32, ;; Size in bits
+ i64 32, ;; Align in bits
+ i64 32, ;; Offset in bits
+ i32 0, ;; Flags
+ metadata !5 ;; Derived From type
+ }
+
+ ;;
+ ;; Define the Blue field.
+ ;;
+ !7 = metadata !{
+ i32 524301, ;; Tag
+ metadata !1, ;; Context
+ metadata !"Blue", ;; Name
+ metadata !1, ;; File
+ i32 4, ;; Line number
+ i64 32, ;; Size in bits
+ i64 32, ;; Align in bits
+ i64 64, ;; Offset in bits
+ i32 0, ;; Flags
+ metadata !5 ;; Derived From type
+ }
+
+ ;;
+ ;; Define the array of fields used by the composite type Color.
+ ;;
+ !3 = metadata !{metadata !4, metadata !6, metadata !7}
+
+C/C++ enumeration types
+-----------------------
+
+Given the following as an example of C/C++ enumeration type:
+
+.. code-block:: c
+
+ enum Trees {
+ Spruce = 100,
+ Oak = 200,
+ Maple = 300
+ };
+
+a C/C++ front-end would generate the following descriptors:
+
+.. code-block:: llvm
+
+ ;;
+ ;; Define composite type for enum Trees
+ ;;
+ !2 = metadata !{
+ i32 524292, ;; Tag
+ metadata !1, ;; Context
+ metadata !"Trees", ;; Name
+ metadata !1, ;; File
+ i32 1, ;; Line number
+ i64 32, ;; Size in bits
+ i64 32, ;; Align in bits
+ i64 0, ;; Offset in bits
+ i32 0, ;; Flags
+ null, ;; Derived From type
+ metadata !3, ;; Elements
+ i32 0 ;; Runtime language
+ }
+
+ ;;
+ ;; Define the array of enumerators used by composite type Trees.
+ ;;
+ !3 = metadata !{metadata !4, metadata !5, metadata !6}
+
+ ;;
+ ;; Define Spruce enumerator.
+ ;;
+ !4 = metadata !{i32 524328, metadata !"Spruce", i64 100}
+
+ ;;
+ ;; Define Oak enumerator.
+ ;;
+ !5 = metadata !{i32 524328, metadata !"Oak", i64 200}
+
+ ;;
+ ;; Define Maple enumerator.
+ ;;
+ !6 = metadata !{i32 524328, metadata !"Maple", i64 300}
+
+Debugging information format
+============================
+
+Debugging Information Extension for Objective C Properties
+----------------------------------------------------------
+
+Introduction
+^^^^^^^^^^^^
+
+Objective C provides a simpler way to declare and define accessor methods using
+declared properties. The language provides features to declare a property and
+to let compiler synthesize accessor methods.
+
+The debugger lets developer inspect Objective C interfaces and their instance
+variables and class variables. However, the debugger does not know anything
+about the properties defined in Objective C interfaces. The debugger consumes
+information generated by compiler in DWARF format. The format does not support
+encoding of Objective C properties. This proposal describes DWARF extensions to
+encode Objective C properties, which the debugger can use to let developers
+inspect Objective C properties.
+
+Proposal
+^^^^^^^^
+
+Objective C properties exist separately from class members. A property can be
+defined only by "setter" and "getter" selectors, and be calculated anew on each
+access. Or a property can just be a direct access to some declared ivar.
+Finally it can have an ivar "automatically synthesized" for it by the compiler,
+in which case the property can be referred to in user code directly using the
+standard C dereference syntax as well as through the property "dot" syntax, but
+there is no entry in the ``@interface`` declaration corresponding to this ivar.
+
+To facilitate debugging, these properties we will add a new DWARF TAG into the
+``DW_TAG_structure_type`` definition for the class to hold the description of a
+given property, and a set of DWARF attributes that provide said description.
+The property tag will also contain the name and declared type of the property.
+
+If there is a related ivar, there will also be a DWARF property attribute placed
+in the ``DW_TAG_member`` DIE for that ivar referring back to the property TAG
+for that property. And in the case where the compiler synthesizes the ivar
+directly, the compiler is expected to generate a ``DW_TAG_member`` for that
+ivar (with the ``DW_AT_artificial`` set to 1), whose name will be the name used
+to access this ivar directly in code, and with the property attribute pointing
+back to the property it is backing.
+
+The following examples will serve as illustration for our discussion:
+
+.. code-block:: objc
+
+ @interface I1 {
+ int n2;
+ }
+
+ @property int p1;
+ @property int p2;
+ @end
+
+ @implementation I1
+ @synthesize p1;
+ @synthesize p2 = n2;
+ @end
+
+This produces the following DWARF (this is a "pseudo dwarfdump" output):
+
+.. code-block:: none
+
+ 0x00000100: TAG_structure_type [7] *
+ AT_APPLE_runtime_class( 0x10 )
+ AT_name( "I1" )
+ AT_decl_file( "Objc_Property.m" )
+ AT_decl_line( 3 )
+
+ 0x00000110 TAG_APPLE_property
+ AT_name ( "p1" )
+ AT_type ( {0x00000150} ( int ) )
+
+ 0x00000120: TAG_APPLE_property
+ AT_name ( "p2" )
+ AT_type ( {0x00000150} ( int ) )
+
+ 0x00000130: TAG_member [8]
+ AT_name( "_p1" )
+ AT_APPLE_property ( {0x00000110} "p1" )
+ AT_type( {0x00000150} ( int ) )
+ AT_artificial ( 0x1 )
+
+ 0x00000140: TAG_member [8]
+ AT_name( "n2" )
+ AT_APPLE_property ( {0x00000120} "p2" )
+ AT_type( {0x00000150} ( int ) )
+
+ 0x00000150: AT_type( ( int ) )
+
+Note, the current convention is that the name of the ivar for an
+auto-synthesized property is the name of the property from which it derives
+with an underscore prepended, as is shown in the example. But we actually
+don't need to know this convention, since we are given the name of the ivar
+directly.
+
+Also, it is common practice in ObjC to have different property declarations in
+the @interface and @implementation - e.g. to provide a read-only property in
+the interface,and a read-write interface in the implementation. In that case,
+the compiler should emit whichever property declaration will be in force in the
+current translation unit.
+
+Developers can decorate a property with attributes which are encoded using
+``DW_AT_APPLE_property_attribute``.
+
+.. code-block:: objc
+
+ @property (readonly, nonatomic) int pr;
+
+.. code-block:: none
+
+ TAG_APPLE_property [8]
+ AT_name( "pr" )
+ AT_type ( {0x00000147} (int) )
+ AT_APPLE_property_attribute (DW_APPLE_PROPERTY_readonly, DW_APPLE_PROPERTY_nonatomic)
+
+The setter and getter method names are attached to the property using
+``DW_AT_APPLE_property_setter`` and ``DW_AT_APPLE_property_getter`` attributes.
+
+.. code-block:: objc
+
+ @interface I1
+ @property (setter=myOwnP3Setter:) int p3;
+ -(void)myOwnP3Setter:(int)a;
+ @end
+
+ @implementation I1
+ @synthesize p3;
+ -(void)myOwnP3Setter:(int)a{ }
+ @end
+
+The DWARF for this would be:
+
+.. code-block:: none
+
+ 0x000003bd: TAG_structure_type [7] *
+ AT_APPLE_runtime_class( 0x10 )
+ AT_name( "I1" )
+ AT_decl_file( "Objc_Property.m" )
+ AT_decl_line( 3 )
+
+ 0x000003cd TAG_APPLE_property
+ AT_name ( "p3" )
+ AT_APPLE_property_setter ( "myOwnP3Setter:" )
+ AT_type( {0x00000147} ( int ) )
+
+ 0x000003f3: TAG_member [8]
+ AT_name( "_p3" )
+ AT_type ( {0x00000147} ( int ) )
+ AT_APPLE_property ( {0x000003cd} )
+ AT_artificial ( 0x1 )
+
+New DWARF Tags
+^^^^^^^^^^^^^^
+
++-----------------------+--------+
+| TAG | Value |
++=======================+========+
+| DW_TAG_APPLE_property | 0x4200 |
++-----------------------+--------+
+
+New DWARF Attributes
+^^^^^^^^^^^^^^^^^^^^
+
++--------------------------------+--------+-----------+
+| Attribute | Value | Classes |
++================================+========+===========+
+| DW_AT_APPLE_property | 0x3fed | Reference |
++--------------------------------+--------+-----------+
+| DW_AT_APPLE_property_getter | 0x3fe9 | String |
++--------------------------------+--------+-----------+
+| DW_AT_APPLE_property_setter | 0x3fea | String |
++--------------------------------+--------+-----------+
+| DW_AT_APPLE_property_attribute | 0x3feb | Constant |
++--------------------------------+--------+-----------+
+
+New DWARF Constants
+^^^^^^^^^^^^^^^^^^^
+
++--------------------------------+-------+
+| Name | Value |
++================================+=======+
+| DW_AT_APPLE_PROPERTY_readonly | 0x1 |
++--------------------------------+-------+
+| DW_AT_APPLE_PROPERTY_readwrite | 0x2 |
++--------------------------------+-------+
+| DW_AT_APPLE_PROPERTY_assign | 0x4 |
++--------------------------------+-------+
+| DW_AT_APPLE_PROPERTY_retain | 0x8 |
++--------------------------------+-------+
+| DW_AT_APPLE_PROPERTY_copy | 0x10 |
++--------------------------------+-------+
+| DW_AT_APPLE_PROPERTY_nonatomic | 0x20 |
++--------------------------------+-------+
+
+Name Accelerator Tables
+-----------------------
+
+Introduction
+^^^^^^^^^^^^
+
+The "``.debug_pubnames``" and "``.debug_pubtypes``" formats are not what a
+debugger needs. The "``pub``" in the section name indicates that the entries
+in the table are publicly visible names only. This means no static or hidden
+functions show up in the "``.debug_pubnames``". No static variables or private
+class variables are in the "``.debug_pubtypes``". Many compilers add different
+things to these tables, so we can't rely upon the contents between gcc, icc, or
+clang.
+
+The typical query given by users tends not to match up with the contents of
+these tables. For example, the DWARF spec states that "In the case of the name
+of a function member or static data member of a C++ structure, class or union,
+the name presented in the "``.debug_pubnames``" section is not the simple name
+given by the ``DW_AT_name attribute`` of the referenced debugging information
+entry, but rather the fully qualified name of the data or function member."
+So the only names in these tables for complex C++ entries is a fully
+qualified name. Debugger users tend not to enter their search strings as
+"``a::b::c(int,const Foo&) const``", but rather as "``c``", "``b::c``" , or
+"``a::b::c``". So the name entered in the name table must be demangled in
+order to chop it up appropriately and additional names must be manually entered
+into the table to make it effective as a name lookup table for debuggers to
+se.
+
+All debuggers currently ignore the "``.debug_pubnames``" table as a result of
+its inconsistent and useless public-only name content making it a waste of
+space in the object file. These tables, when they are written to disk, are not
+sorted in any way, leaving every debugger to do its own parsing and sorting.
+These tables also include an inlined copy of the string values in the table
+itself making the tables much larger than they need to be on disk, especially
+for large C++ programs.
+
+Can't we just fix the sections by adding all of the names we need to this
+table? No, because that is not what the tables are defined to contain and we
+won't know the difference between the old bad tables and the new good tables.
+At best we could make our own renamed sections that contain all of the data we
+need.
+
+These tables are also insufficient for what a debugger like LLDB needs. LLDB
+uses clang for its expression parsing where LLDB acts as a PCH. LLDB is then
+often asked to look for type "``foo``" or namespace "``bar``", or list items in
+namespace "``baz``". Namespaces are not included in the pubnames or pubtypes
+tables. Since clang asks a lot of questions when it is parsing an expression,
+we need to be very fast when looking up names, as it happens a lot. Having new
+accelerator tables that are optimized for very quick lookups will benefit this
+type of debugging experience greatly.
+
+We would like to generate name lookup tables that can be mapped into memory
+from disk, and used as is, with little or no up-front parsing. We would also
+be able to control the exact content of these different tables so they contain
+exactly what we need. The Name Accelerator Tables were designed to fix these
+issues. In order to solve these issues we need to:
+
+* Have a format that can be mapped into memory from disk and used as is
+* Lookups should be very fast
+* Extensible table format so these tables can be made by many producers
+* Contain all of the names needed for typical lookups out of the box
+* Strict rules for the contents of tables
+
+Table size is important and the accelerator table format should allow the reuse
+of strings from common string tables so the strings for the names are not
+duplicated. We also want to make sure the table is ready to be used as-is by
+simply mapping the table into memory with minimal header parsing.
+
+The name lookups need to be fast and optimized for the kinds of lookups that
+debuggers tend to do. Optimally we would like to touch as few parts of the
+mapped table as possible when doing a name lookup and be able to quickly find
+the name entry we are looking for, or discover there are no matches. In the
+case of debuggers we optimized for lookups that fail most of the time.
+
+Each table that is defined should have strict rules on exactly what is in the
+accelerator tables and documented so clients can rely on the content.
+
+Hash Tables
+^^^^^^^^^^^
+
+Standard Hash Tables
+""""""""""""""""""""
+
+Typical hash tables have a header, buckets, and each bucket points to the
+bucket contents:
+
+.. code-block:: none
+
+ .------------.
+ | HEADER |
+ |------------|
+ | BUCKETS |
+ |------------|
+ | DATA |
+ `------------'
+
+The BUCKETS are an array of offsets to DATA for each hash:
+
+.. code-block:: none
+
+ .------------.
+ | 0x00001000 | BUCKETS[0]
+ | 0x00002000 | BUCKETS[1]
+ | 0x00002200 | BUCKETS[2]
+ | 0x000034f0 | BUCKETS[3]
+ | | ...
+ | 0xXXXXXXXX | BUCKETS[n_buckets]
+ '------------'
+
+So for ``bucket[3]`` in the example above, we have an offset into the table
+0x000034f0 which points to a chain of entries for the bucket. Each bucket must
+contain a next pointer, full 32 bit hash value, the string itself, and the data
+for the current string value.
+
+.. code-block:: none
+
+ .------------.
+ 0x000034f0: | 0x00003500 | next pointer
+ | 0x12345678 | 32 bit hash
+ | "erase" | string value
+ | data[n] | HashData for this bucket
+ |------------|
+ 0x00003500: | 0x00003550 | next pointer
+ | 0x29273623 | 32 bit hash
+ | "dump" | string value
+ | data[n] | HashData for this bucket
+ |------------|
+ 0x00003550: | 0x00000000 | next pointer
+ | 0x82638293 | 32 bit hash
+ | "main" | string value
+ | data[n] | HashData for this bucket
+ `------------'
+
+The problem with this layout for debuggers is that we need to optimize for the
+negative lookup case where the symbol we're searching for is not present. So
+if we were to lookup "``printf``" in the table above, we would make a 32 hash
+for "``printf``", it might match ``bucket[3]``. We would need to go to the
+offset 0x000034f0 and start looking to see if our 32 bit hash matches. To do
+so, we need to read the next pointer, then read the hash, compare it, and skip
+to the next bucket. Each time we are skipping many bytes in memory and
+touching new cache pages just to do the compare on the full 32 bit hash. All
+of these accesses then tell us that we didn't have a match.
+
+Name Hash Tables
+""""""""""""""""
+
+To solve the issues mentioned above we have structured the hash tables a bit
+differently: a header, buckets, an array of all unique 32 bit hash values,
+followed by an array of hash value data offsets, one for each hash value, then
+the data for all hash values:
+
+.. code-block:: none
+
+ .-------------.
+ | HEADER |
+ |-------------|
+ | BUCKETS |
+ |-------------|
+ | HASHES |
+ |-------------|
+ | OFFSETS |
+ |-------------|
+ | DATA |
+ `-------------'
+
+The ``BUCKETS`` in the name tables are an index into the ``HASHES`` array. By
+making all of the full 32 bit hash values contiguous in memory, we allow
+ourselves to efficiently check for a match while touching as little memory as
+possible. Most often checking the 32 bit hash values is as far as the lookup
+goes. If it does match, it usually is a match with no collisions. So for a
+table with "``n_buckets``" buckets, and "``n_hashes``" unique 32 bit hash
+values, we can clarify the contents of the ``BUCKETS``, ``HASHES`` and
+``OFFSETS`` as:
+
+.. code-block:: none
+
+ .-------------------------.
+ | HEADER.magic | uint32_t
+ | HEADER.version | uint16_t
+ | HEADER.hash_function | uint16_t
+ | HEADER.bucket_count | uint32_t
+ | HEADER.hashes_count | uint32_t
+ | HEADER.header_data_len | uint32_t
+ | HEADER_DATA | HeaderData
+ |-------------------------|
+ | BUCKETS | uint32_t[bucket_count] // 32 bit hash indexes
+ |-------------------------|
+ | HASHES | uint32_t[hashes_count] // 32 bit hash values
+ |-------------------------|
+ | OFFSETS | uint32_t[hashes_count] // 32 bit offsets to hash value data
+ |-------------------------|
+ | ALL HASH DATA |
+ `-------------------------'
+
+So taking the exact same data from the standard hash example above we end up
+with:
+
+.. code-block:: none
+
+ .------------.
+ | HEADER |
+ |------------|
+ | 0 | BUCKETS[0]
+ | 2 | BUCKETS[1]
+ | 5 | BUCKETS[2]
+ | 6 | BUCKETS[3]
+ | | ...
+ | ... | BUCKETS[n_buckets]
+ |------------|
+ | 0x........ | HASHES[0]
+ | 0x........ | HASHES[1]
+ | 0x........ | HASHES[2]
+ | 0x........ | HASHES[3]
+ | 0x........ | HASHES[4]
+ | 0x........ | HASHES[5]
+ | 0x12345678 | HASHES[6] hash for BUCKETS[3]
+ | 0x29273623 | HASHES[7] hash for BUCKETS[3]
+ | 0x82638293 | HASHES[8] hash for BUCKETS[3]
+ | 0x........ | HASHES[9]
+ | 0x........ | HASHES[10]
+ | 0x........ | HASHES[11]
+ | 0x........ | HASHES[12]
+ | 0x........ | HASHES[13]
+ | 0x........ | HASHES[n_hashes]
+ |------------|
+ | 0x........ | OFFSETS[0]
+ | 0x........ | OFFSETS[1]
+ | 0x........ | OFFSETS[2]
+ | 0x........ | OFFSETS[3]
+ | 0x........ | OFFSETS[4]
+ | 0x........ | OFFSETS[5]
+ | 0x000034f0 | OFFSETS[6] offset for BUCKETS[3]
+ | 0x00003500 | OFFSETS[7] offset for BUCKETS[3]
+ | 0x00003550 | OFFSETS[8] offset for BUCKETS[3]
+ | 0x........ | OFFSETS[9]
+ | 0x........ | OFFSETS[10]
+ | 0x........ | OFFSETS[11]
+ | 0x........ | OFFSETS[12]
+ | 0x........ | OFFSETS[13]
+ | 0x........ | OFFSETS[n_hashes]
+ |------------|
+ | |
+ | |
+ | |
+ | |
+ | |
+ |------------|
+ 0x000034f0: | 0x00001203 | .debug_str ("erase")
+ | 0x00000004 | A 32 bit array count - number of HashData with name "erase"
+ | 0x........ | HashData[0]
+ | 0x........ | HashData[1]
+ | 0x........ | HashData[2]
+ | 0x........ | HashData[3]
+ | 0x00000000 | String offset into .debug_str (terminate data for hash)
+ |------------|
+ 0x00003500: | 0x00001203 | String offset into .debug_str ("collision")
+ | 0x00000002 | A 32 bit array count - number of HashData with name "collision"
+ | 0x........ | HashData[0]
+ | 0x........ | HashData[1]
+ | 0x00001203 | String offset into .debug_str ("dump")
+ | 0x00000003 | A 32 bit array count - number of HashData with name "dump"
+ | 0x........ | HashData[0]
+ | 0x........ | HashData[1]
+ | 0x........ | HashData[2]
+ | 0x00000000 | String offset into .debug_str (terminate data for hash)
+ |------------|
+ 0x00003550: | 0x00001203 | String offset into .debug_str ("main")
+ | 0x00000009 | A 32 bit array count - number of HashData with name "main"
+ | 0x........ | HashData[0]
+ | 0x........ | HashData[1]
+ | 0x........ | HashData[2]
+ | 0x........ | HashData[3]
+ | 0x........ | HashData[4]
+ | 0x........ | HashData[5]
+ | 0x........ | HashData[6]
+ | 0x........ | HashData[7]
+ | 0x........ | HashData[8]
+ | 0x00000000 | String offset into .debug_str (terminate data for hash)
+ `------------'
+
+So we still have all of the same data, we just organize it more efficiently for
+debugger lookup. If we repeat the same "``printf``" lookup from above, we
+would hash "``printf``" and find it matches ``BUCKETS[3]`` by taking the 32 bit
+hash value and modulo it by ``n_buckets``. ``BUCKETS[3]`` contains "6" which
+is the index into the ``HASHES`` table. We would then compare any consecutive
+32 bit hashes values in the ``HASHES`` array as long as the hashes would be in
+``BUCKETS[3]``. We do this by verifying that each subsequent hash value modulo
+``n_buckets`` is still 3. In the case of a failed lookup we would access the
+memory for ``BUCKETS[3]``, and then compare a few consecutive 32 bit hashes
+before we know that we have no match. We don't end up marching through
+multiple words of memory and we really keep the number of processor data cache
+lines being accessed as small as possible.
+
+The string hash that is used for these lookup tables is the Daniel J.
+Bernstein hash which is also used in the ELF ``GNU_HASH`` sections. It is a
+very good hash for all kinds of names in programs with very few hash
+collisions.
+
+Empty buckets are designated by using an invalid hash index of ``UINT32_MAX``.
+
+Details
+^^^^^^^
+
+These name hash tables are designed to be generic where specializations of the
+table get to define additional data that goes into the header ("``HeaderData``"),
+how the string value is stored ("``KeyType``") and the content of the data for each
+hash value.
+
+Header Layout
+"""""""""""""
+
+The header has a fixed part, and the specialized part. The exact format of the
+header is:
+
+.. code-block:: c
+
+ struct Header
+ {
+ uint32_t magic; // 'HASH' magic value to allow endian detection
+ uint16_t version; // Version number
+ uint16_t hash_function; // The hash function enumeration that was used
+ uint32_t bucket_count; // The number of buckets in this hash table
+ uint32_t hashes_count; // The total number of unique hash values and hash data offsets in this table
+ uint32_t header_data_len; // The bytes to skip to get to the hash indexes (buckets) for correct alignment
+ // Specifically the length of the following HeaderData field - this does not
+ // include the size of the preceding fields
+ HeaderData header_data; // Implementation specific header data
+ };
+
+The header starts with a 32 bit "``magic``" value which must be ``'HASH'``
+encoded as an ASCII integer. This allows the detection of the start of the
+hash table and also allows the table's byte order to be determined so the table
+can be correctly extracted. The "``magic``" value is followed by a 16 bit
+``version`` number which allows the table to be revised and modified in the
+future. The current version number is 1. ``hash_function`` is a ``uint16_t``
+enumeration that specifies which hash function was used to produce this table.
+The current values for the hash function enumerations include:
+
+.. code-block:: c
+
+ enum HashFunctionType
+ {
+ eHashFunctionDJB = 0u, // Daniel J Bernstein hash function
+ };
+
+``bucket_count`` is a 32 bit unsigned integer that represents how many buckets
+are in the ``BUCKETS`` array. ``hashes_count`` is the number of unique 32 bit
+hash values that are in the ``HASHES`` array, and is the same number of offsets
+are contained in the ``OFFSETS`` array. ``header_data_len`` specifies the size
+in bytes of the ``HeaderData`` that is filled in by specialized versions of
+this table.
+
+Fixed Lookup
+""""""""""""
+
+The header is followed by the buckets, hashes, offsets, and hash value data.
+
+.. code-block:: c
+
+ struct FixedTable
+ {
+ uint32_t buckets[Header.bucket_count]; // An array of hash indexes into the "hashes[]" array below
+ uint32_t hashes [Header.hashes_count]; // Every unique 32 bit hash for the entire table is in this table
+ uint32_t offsets[Header.hashes_count]; // An offset that corresponds to each item in the "hashes[]" array above
+ };
+
+``buckets`` is an array of 32 bit indexes into the ``hashes`` array. The
+``hashes`` array contains all of the 32 bit hash values for all names in the
+hash table. Each hash in the ``hashes`` table has an offset in the ``offsets``
+array that points to the data for the hash value.
+
+This table setup makes it very easy to repurpose these tables to contain
+different data, while keeping the lookup mechanism the same for all tables.
+This layout also makes it possible to save the table to disk and map it in
+later and do very efficient name lookups with little or no parsing.
+
+DWARF lookup tables can be implemented in a variety of ways and can store a lot
+of information for each name. We want to make the DWARF tables extensible and
+able to store the data efficiently so we have used some of the DWARF features
+that enable efficient data storage to define exactly what kind of data we store
+for each name.
+
+The ``HeaderData`` contains a definition of the contents of each HashData chunk.
+We might want to store an offset to all of the debug information entries (DIEs)
+for each name. To keep things extensible, we create a list of items, or
+Atoms, that are contained in the data for each name. First comes the type of
+the data in each atom:
+
+.. code-block:: c
+
+ enum AtomType
+ {
+ eAtomTypeNULL = 0u,
+ eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding
+ eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that contains the item in question
+ eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as DW_FORM_data1 (if no tags exceed 255) or DW_FORM_data2
+ eAtomTypeNameFlags = 4u, // Flags from enum NameFlags
+ eAtomTypeTypeFlags = 5u, // Flags from enum TypeFlags
+ };
+
+The enumeration values and their meanings are:
+
+.. code-block:: none
+
+ eAtomTypeNULL - a termination atom that specifies the end of the atom list
+ eAtomTypeDIEOffset - an offset into the .debug_info section for the DWARF DIE for this name
+ eAtomTypeCUOffset - an offset into the .debug_info section for the CU that contains the DIE
+ eAtomTypeDIETag - The DW_TAG_XXX enumeration value so you don't have to parse the DWARF to see what it is
+ eAtomTypeNameFlags - Flags for functions and global variables (isFunction, isInlined, isExternal...)
+ eAtomTypeTypeFlags - Flags for types (isCXXClass, isObjCClass, ...)
+
+Then we allow each atom type to define the atom type and how the data for each
+atom type data is encoded:
+
+.. code-block:: c
+
+ struct Atom
+ {
+ uint16_t type; // AtomType enum value
+ uint16_t form; // DWARF DW_FORM_XXX defines
+ };
+
+The ``form`` type above is from the DWARF specification and defines the exact
+encoding of the data for the Atom type. See the DWARF specification for the
+``DW_FORM_`` definitions.
+
+.. code-block:: c
+
+ struct HeaderData
+ {
+ uint32_t die_offset_base;
+ uint32_t atom_count;
+ Atoms atoms[atom_count0];
+ };
+
+``HeaderData`` defines the base DIE offset that should be added to any atoms
+that are encoded using the ``DW_FORM_ref1``, ``DW_FORM_ref2``,
+``DW_FORM_ref4``, ``DW_FORM_ref8`` or ``DW_FORM_ref_udata``. It also defines
+what is contained in each ``HashData`` object -- ``Atom.form`` tells us how large
+each field will be in the ``HashData`` and the ``Atom.type`` tells us how this data
+should be interpreted.
+
+For the current implementations of the "``.apple_names``" (all functions +
+globals), the "``.apple_types``" (names of all types that are defined), and
+the "``.apple_namespaces``" (all namespaces), we currently set the ``Atom``
+array to be:
+
+.. code-block:: c
+
+ HeaderData.atom_count = 1;
+ HeaderData.atoms[0].type = eAtomTypeDIEOffset;
+ HeaderData.atoms[0].form = DW_FORM_data4;
+
+This defines the contents to be the DIE offset (eAtomTypeDIEOffset) that is
+ encoded as a 32 bit value (DW_FORM_data4). This allows a single name to have
+ multiple matching DIEs in a single file, which could come up with an inlined
+ function for instance. Future tables could include more information about the
+ DIE such as flags indicating if the DIE is a function, method, block,
+ or inlined.
+
+The KeyType for the DWARF table is a 32 bit string table offset into the
+ ".debug_str" table. The ".debug_str" is the string table for the DWARF which
+ may already contain copies of all of the strings. This helps make sure, with
+ help from the compiler, that we reuse the strings between all of the DWARF
+ sections and keeps the hash table size down. Another benefit to having the
+ compiler generate all strings as DW_FORM_strp in the debug info, is that
+ DWARF parsing can be made much faster.
+
+After a lookup is made, we get an offset into the hash data. The hash data
+ needs to be able to deal with 32 bit hash collisions, so the chunk of data
+ at the offset in the hash data consists of a triple:
+
+.. code-block:: c
+
+ uint32_t str_offset
+ uint32_t hash_data_count
+ HashData[hash_data_count]
+
+If "str_offset" is zero, then the bucket contents are done. 99.9% of the
+ hash data chunks contain a single item (no 32 bit hash collision):
+
+.. code-block:: none
+
+ .------------.
+ | 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main")
+ | 0x00000004 | uint32_t HashData count
+ | 0x........ | uint32_t HashData[0] DIE offset
+ | 0x........ | uint32_t HashData[1] DIE offset
+ | 0x........ | uint32_t HashData[2] DIE offset
+ | 0x........ | uint32_t HashData[3] DIE offset
+ | 0x00000000 | uint32_t KeyType (end of hash chain)
+ `------------'
+
+If there are collisions, you will have multiple valid string offsets:
+
+.. code-block:: none
+
+ .------------.
+ | 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main")
+ | 0x00000004 | uint32_t HashData count
+ | 0x........ | uint32_t HashData[0] DIE offset
+ | 0x........ | uint32_t HashData[1] DIE offset
+ | 0x........ | uint32_t HashData[2] DIE offset
+ | 0x........ | uint32_t HashData[3] DIE offset
+ | 0x00002023 | uint32_t KeyType (.debug_str[0x0002023] => "print")
+ | 0x00000002 | uint32_t HashData count
+ | 0x........ | uint32_t HashData[0] DIE offset
+ | 0x........ | uint32_t HashData[1] DIE offset
+ | 0x00000000 | uint32_t KeyType (end of hash chain)
+ `------------'
+
+Current testing with real world C++ binaries has shown that there is around 1
+32 bit hash collision per 100,000 name entries.
+
+Contents
+^^^^^^^^
+
+As we said, we want to strictly define exactly what is included in the
+different tables. For DWARF, we have 3 tables: "``.apple_names``",
+"``.apple_types``", and "``.apple_namespaces``".
+
+"``.apple_names``" sections should contain an entry for each DWARF DIE whose
+``DW_TAG`` is a ``DW_TAG_label``, ``DW_TAG_inlined_subroutine``, or
+``DW_TAG_subprogram`` that has address attributes: ``DW_AT_low_pc``,
+``DW_AT_high_pc``, ``DW_AT_ranges`` or ``DW_AT_entry_pc``. It also contains
+``DW_TAG_variable`` DIEs that have a ``DW_OP_addr`` in the location (global and
+static variables). All global and static variables should be included,
+including those scoped within functions and classes. For example using the
+following code:
+
+.. code-block:: c
+
+ static int var = 0;
+
+ void f ()
+ {
+ static int var = 0;
+ }
+
+Both of the static ``var`` variables would be included in the table. All
+functions should emit both their full names and their basenames. For C or C++,
+the full name is the mangled name (if available) which is usually in the
+``DW_AT_MIPS_linkage_name`` attribute, and the ``DW_AT_name`` contains the
+function basename. If global or static variables have a mangled name in a
+``DW_AT_MIPS_linkage_name`` attribute, this should be emitted along with the
+simple name found in the ``DW_AT_name`` attribute.
+
+"``.apple_types``" sections should contain an entry for each DWARF DIE whose
+tag is one of:
+
+* DW_TAG_array_type
+* DW_TAG_class_type
+* DW_TAG_enumeration_type
+* DW_TAG_pointer_type
+* DW_TAG_reference_type
+* DW_TAG_string_type
+* DW_TAG_structure_type
+* DW_TAG_subroutine_type
+* DW_TAG_typedef
+* DW_TAG_union_type
+* DW_TAG_ptr_to_member_type
+* DW_TAG_set_type
+* DW_TAG_subrange_type
+* DW_TAG_base_type
+* DW_TAG_const_type
+* DW_TAG_constant
+* DW_TAG_file_type
+* DW_TAG_namelist
+* DW_TAG_packed_type
+* DW_TAG_volatile_type
+* DW_TAG_restrict_type
+* DW_TAG_interface_type
+* DW_TAG_unspecified_type
+* DW_TAG_shared_type
+
+Only entries with a ``DW_AT_name`` attribute are included, and the entry must
+not be a forward declaration (``DW_AT_declaration`` attribute with a non-zero
+value). For example, using the following code:
+
+.. code-block:: c
+
+ int main ()
+ {
+ int *b = 0;
+ return *b;
+ }
+
+We get a few type DIEs:
+
+.. code-block:: none
+
+ 0x00000067: TAG_base_type [5]
+ AT_encoding( DW_ATE_signed )
+ AT_name( "int" )
+ AT_byte_size( 0x04 )
+
+ 0x0000006e: TAG_pointer_type [6]
+ AT_type( {0x00000067} ( int ) )
+ AT_byte_size( 0x08 )
+
+The DW_TAG_pointer_type is not included because it does not have a ``DW_AT_name``.
+
+"``.apple_namespaces``" section should contain all ``DW_TAG_namespace`` DIEs.
+If we run into a namespace that has no name this is an anonymous namespace, and
+the name should be output as "``(anonymous namespace)``" (without the quotes).
+Why? This matches the output of the ``abi::cxa_demangle()`` that is in the
+standard C++ library that demangles mangled names.
+
+
+Language Extensions and File Format Changes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Objective-C Extensions
+""""""""""""""""""""""
+
+"``.apple_objc``" section should contain all ``DW_TAG_subprogram`` DIEs for an
+Objective-C class. The name used in the hash table is the name of the
+Objective-C class itself. If the Objective-C class has a category, then an
+entry is made for both the class name without the category, and for the class
+name with the category. So if we have a DIE at offset 0x1234 with a name of
+method "``-[NSString(my_additions) stringWithSpecialString:]``", we would add
+an entry for "``NSString``" that points to DIE 0x1234, and an entry for
+"``NSString(my_additions)``" that points to 0x1234. This allows us to quickly
+track down all Objective-C methods for an Objective-C class when doing
+expressions. It is needed because of the dynamic nature of Objective-C where
+anyone can add methods to a class. The DWARF for Objective-C methods is also
+emitted differently from C++ classes where the methods are not usually
+contained in the class definition, they are scattered about across one or more
+compile units. Categories can also be defined in different shared libraries.
+So we need to be able to quickly find all of the methods and class functions
+given the Objective-C class name, or quickly find all methods and class
+functions for a class + category name. This table does not contain any
+selector names, it just maps Objective-C class names (or class names +
+category) to all of the methods and class functions. The selectors are added
+as function basenames in the "``.debug_names``" section.
+
+In the "``.apple_names``" section for Objective-C functions, the full name is
+the entire function name with the brackets ("``-[NSString
+stringWithCString:]``") and the basename is the selector only
+("``stringWithCString:``").
+
+Mach-O Changes
+""""""""""""""
+
+The sections names for the apple hash tables are for non mach-o files. For
+mach-o files, the sections should be contained in the ``__DWARF`` segment with
+names as follows:
+
+* "``.apple_names``" -> "``__apple_names``"
+* "``.apple_types``" -> "``__apple_types``"
+* "``.apple_namespaces``" -> "``__apple_namespac``" (16 character limit)
+* "``.apple_objc``" -> "``__apple_objc``"
+
diff --git a/docs/SystemLibrary.html b/docs/SystemLibrary.html
deleted file mode 100644
index 1ef221fa27..0000000000
--- a/docs/SystemLibrary.html
+++ /dev/null
@@ -1,316 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
- "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
- <title>System Library</title>
- <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-<body>
-
-<h1>System Library</h1>
-<ul>
- <li><a href="#abstract">Abstract</a></li>
- <li><a href="#requirements">Keeping LLVM Portable</a>
- <ol>
- <li><a href="#headers">Don't Include System Headers</a></li>
- <li><a href="#expose">Don't Expose System Headers</a></li>
- <li><a href="#c_headers">Allow Standard C Header Files</a></li>
- <li><a href="#cpp_headers">Allow Standard C++ Header Files</a></li>
- <li><a href="#highlev">High-Level Interface</a></li>
- <li><a href="#nofunc">No Exposed Functions</a></li>
- <li><a href="#nodata">No Exposed Data</a></li>
- <li><a href="#nodupl">No Duplicate Implementations</a></li>
- <li><a href="#nounused">No Unused Functionality</a></li>
- <li><a href="#virtuals">No Virtual Methods</a></li>
- <li><a href="#softerrors">Minimize Soft Errors</a></li>
- <li><a href="#throw_spec">No throw() Specifications</a></li>
- <li><a href="#organization">Code Organization</a></li>
- <li><a href="#semantics">Consistent Semantics</a></li>
- <li><a href="#bug">Tracking Bugzilla Bug: 351</a></li>
- </ol></li>
-</ul>
-
-<div class="doc_author">
- <p>Written by <a href="mailto:rspencer@x10sys.com">Reid Spencer</a></p>
-</div>
-
-
-<!-- *********************************************************************** -->
-<h2><a name="abstract">Abstract</a></h2>
-<div>
- <p>This document provides some details on LLVM's System Library, located in
- the source at <tt>lib/System</tt> and <tt>include/llvm/System</tt>. The
- library's purpose is to shield LLVM from the differences between operating
- systems for the few services LLVM needs from the operating system. Much of
- LLVM is written using portability features of standard C++. However, in a few
- areas, system dependent facilities are needed and the System Library is the
- wrapper around those system calls.</p>
- <p>By centralizing LLVM's use of operating system interfaces, we make it
- possible for the LLVM tool chain and runtime libraries to be more easily
- ported to new platforms since (theoretically) only <tt>lib/System</tt> needs
- to be ported. This library also unclutters the rest of LLVM from #ifdef use
- and special cases for specific operating systems. Such uses are replaced
- with simple calls to the interfaces provided in <tt>include/llvm/System</tt>.
- </p>
- <p>Note that the System Library is not intended to be a complete operating
- system wrapper (such as the Adaptive Communications Environment (ACE) or
- Apache Portable Runtime (APR)), but only provides the functionality necessary
- to support LLVM.
- <p>The System Library was written by Reid Spencer who formulated the
- design based on similar work originating from the eXtensible Programming
- System (XPS). Several people helped with the effort; especially,
- Jeff Cohen and Henrik Bach on the Win32 port.</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
- <a name="requirements">Keeping LLVM Portable</a>
-</h2>
-<div>
- <p>In order to keep LLVM portable, LLVM developers should adhere to a set of
- portability rules associated with the System Library. Adherence to these rules
- should help the System Library achieve its goal of shielding LLVM from the
- variations in operating system interfaces and doing so efficiently. The
- following sections define the rules needed to fulfill this objective.</p>
-
-<!-- ======================================================================= -->
-<h3><a name="headers">Don't Include System Headers</a></h3>
-<div>
- <p>Except in <tt>lib/System</tt>, no LLVM source code should directly
- <tt>#include</tt> a system header. Care has been taken to remove all such
- <tt>#includes</tt> from LLVM while <tt>lib/System</tt> was being
- developed. Specifically this means that header files like "unistd.h",
- "windows.h", "stdio.h", and "string.h" are forbidden to be included by LLVM
- source code outside the implementation of <tt>lib/System</tt>.</p>
- <p>To obtain system-dependent functionality, existing interfaces to the system
- found in <tt>include/llvm/System</tt> should be used. If an appropriate
- interface is not available, it should be added to <tt>include/llvm/System</tt>
- and implemented in <tt>lib/System</tt> for all supported platforms.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="expose">Don't Expose System Headers</a></h3>
-<div>
- <p>The System Library must shield LLVM from <em>all</em> system headers. To
- obtain system level functionality, LLVM source must
- <tt>#include "llvm/System/Thing.h"</tt> and nothing else. This means that
- <tt>Thing.h</tt> cannot expose any system header files. This protects LLVM
- from accidentally using system specific functionality and only allows it
- via the <tt>lib/System</tt> interface.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="c_headers">Use Standard C Headers</a></h3>
-<div>
- <p>The <em>standard</em> C headers (the ones beginning with "c") are allowed
- to be exposed through the <tt>lib/System</tt> interface. These headers and
- the things they declare are considered to be platform agnostic. LLVM source
- files may include them directly or obtain their inclusion through
- <tt>lib/System</tt> interfaces.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="cpp_headers">Use Standard C++ Headers</a></h3>
-<div>
- <p>The <em>standard</em> C++ headers from the standard C++ library and
- standard template library may be exposed through the <tt>lib/System</tt>
- interface. These headers and the things they declare are considered to be
- platform agnostic. LLVM source files may include them or obtain their
- inclusion through lib/System interfaces.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="highlev">High Level Interface</a></h3>
-<div>
- <p>The entry points specified in the interface of lib/System must be aimed at
- completing some reasonably high level task needed by LLVM. We do not want to
- simply wrap each operating system call. It would be preferable to wrap several
- operating system calls that are always used in conjunction with one another by
- LLVM.</p>
- <p>For example, consider what is needed to execute a program, wait for it to
- complete, and return its result code. On Unix, this involves the following
- operating system calls: <tt>getenv, fork, execve,</tt> and <tt>wait</tt>. The
- correct thing for lib/System to provide is a function, say
- <tt>ExecuteProgramAndWait</tt>, that implements the functionality completely.
- what we don't want is wrappers for the operating system calls involved.</p>
- <p>There must <em>not</em> be a one-to-one relationship between operating
- system calls and the System library's interface. Any such interface function
- will be suspicious.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="nounused">No Unused Functionality</a></h3>
-<div>
- <p>There must be no functionality specified in the interface of lib/System
- that isn't actually used by LLVM. We're not writing a general purpose
- operating system wrapper here, just enough to satisfy LLVM's needs. And, LLVM
- doesn't need much. This design goal aims to keep the lib/System interface
- small and understandable which should foster its actual use and adoption.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="nodupl">No Duplicate Implementations</a></h3>
-<div>
- <p>The implementation of a function for a given platform must be written
- exactly once. This implies that it must be possible to apply a function's
- implementation to multiple operating systems if those operating systems can
- share the same implementation. This rule applies to the set of operating
- systems supported for a given class of operating system (e.g. Unix, Win32).
- </p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="virtuals">No Virtual Methods</a></h3>
-<div>
- <p>The System Library interfaces can be called quite frequently by LLVM. In
- order to make those calls as efficient as possible, we discourage the use of
- virtual methods. There is no need to use inheritance for implementation
- differences, it just adds complexity. The <tt>#include</tt> mechanism works
- just fine.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="nofunc">No Exposed Functions</a></h3>
-<div>
- <p>Any functions defined by system libraries (i.e. not defined by lib/System)
- must not be exposed through the lib/System interface, even if the header file
- for that function is not exposed. This prevents inadvertent use of system
- specific functionality.</p>
- <p>For example, the <tt>stat</tt> system call is notorious for having
- variations in the data it provides. <tt>lib/System</tt> must not declare
- <tt>stat</tt> nor allow it to be declared. Instead it should provide its own
- interface to discovering information about files and directories. Those
- interfaces may be implemented in terms of <tt>stat</tt> but that is strictly
- an implementation detail. The interface provided by the System Library must
- be implemented on all platforms (even those without <tt>stat</tt>).</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="nodata">No Exposed Data</a></h3>
-<div>
- <p>Any data defined by system libraries (i.e. not defined by lib/System) must
- not be exposed through the lib/System interface, even if the header file for
- that function is not exposed. As with functions, this prevents inadvertent use
- of data that might not exist on all platforms.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="softerrors">Minimize Soft Errors</a></h3>
-<div>
- <p>Operating system interfaces will generally provide error results for every
- little thing that could go wrong. In almost all cases, you can divide these
- error results into two groups: normal/good/soft and abnormal/bad/hard. That
- is, some of the errors are simply information like "file not found",
- "insufficient privileges", etc. while other errors are much harder like
- "out of space", "bad disk sector", or "system call interrupted". We'll call
- the first group "<i>soft</i>" errors and the second group "<i>hard</i>"
- errors.<p>
- <p>lib/System must always attempt to minimize soft errors.
- This is a design requirement because the
- minimization of soft errors can affect the granularity and the nature of the
- interface. In general, if you find that you're wanting to throw soft errors,
- you must review the granularity of the interface because it is likely you're
- trying to implement something that is too low level. The rule of thumb is to
- provide interface functions that <em>can't</em> fail, except when faced with
- hard errors.</p>
- <p>For a trivial example, suppose we wanted to add an "OpenFileForWriting"
- function. For many operating systems, if the file doesn't exist, attempting
- to open the file will produce an error. However, lib/System should not
- simply throw that error if it occurs because its a soft error. The problem
- is that the interface function, OpenFileForWriting is too low level. It should
- be OpenOrCreateFileForWriting. In the case of the soft "doesn't exist" error,
- this function would just create it and then open it for writing.</p>
- <p>This design principle needs to be maintained in lib/System because it
- avoids the propagation of soft error handling throughout the rest of LLVM.
- Hard errors will generally just cause a termination for an LLVM tool so don't
- be bashful about throwing them.</p>
- <p>Rules of thumb:</p>
- <ol>
- <li>Don't throw soft errors, only hard errors.</li>
- <li>If you're tempted to throw a soft error, re-think the interface.</li>
- <li>Handle internally the most common normal/good/soft error conditions
- so the rest of LLVM doesn't have to.</li>
- </ol>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="throw_spec">No throw Specifications</a></h3>
-<div>
- <p>None of the lib/System interface functions may be declared with C++
- <tt>throw()</tt> specifications on them. This requirement makes sure that the
- compiler does not insert additional exception handling code into the interface
- functions. This is a performance consideration: lib/System functions are at
- the bottom of many call chains and as such can be frequently called. We
- need them to be as efficient as possible. However, no routines in the
- system library should actually throw exceptions.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="organization">Code Organization</a></h3>
-<div>
- <p>Implementations of the System Library interface are separated by their
- general class of operating system. Currently only Unix and Win32 classes are
- defined but more could be added for other operating system classifications.
- To distinguish which implementation to compile, the code in lib/System uses
- the LLVM_ON_UNIX and LLVM_ON_WIN32 #defines provided via configure through the
- llvm/Config/config.h file. Each source file in lib/System, after implementing
- the generic (operating system independent) functionality needs to include the
- correct implementation using a set of <tt>#if defined(LLVM_ON_XYZ)</tt>
- directives. For example, if we had lib/System/File.cpp, we'd expect to see in
- that file:</p>
- <pre><tt>
- #if defined(LLVM_ON_UNIX)
- #include "Unix/File.cpp"
- #endif
- #if defined(LLVM_ON_WIN32)
- #include "Win32/File.cpp"
- #endif
- </tt></pre>
- <p>The implementation in lib/System/Unix/File.cpp should handle all Unix
- variants. The implementation in lib/System/Win32/File.cpp should handle all
- Win32 variants. What this does is quickly differentiate the basic class of
- operating system that will provide the implementation. The specific details
- for a given platform must still be determined through the use of
- <tt>#ifdef</tt>.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="semantics">Consistent Semantics</a></h3>
-<div>
- <p>The implementation of a lib/System interface can vary drastically between
- platforms. That's okay as long as the end result of the interface function
- is the same. For example, a function to create a directory is pretty straight
- forward on all operating system. System V IPC on the other hand isn't even
- supported on all platforms. Instead of "supporting" System V IPC, lib/System
- should provide an interface to the basic concept of inter-process
- communications. The implementations might use System V IPC if that was
- available or named pipes, or whatever gets the job done effectively for a
- given operating system. In all cases, the interface and the implementation
- must be semantically consistent. </p>
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="bug">Bug 351</a></h3>
-<div>
- <p>See <a href="http://llvm.org/PR351">bug 351</a>
- for further details on the progress of this work</p>
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-
-<hr>
-<address>
- <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
- src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
- <a href="http://validator.w3.org/check/referer"><img
- src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
- <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
- <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
- Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/SystemLibrary.rst b/docs/SystemLibrary.rst
new file mode 100644
index 0000000000..b54026717c
--- /dev/null
+++ b/docs/SystemLibrary.rst
@@ -0,0 +1,250 @@
+==============
+System Library
+==============
+
+.. sectionauthor:: Reid Spencer <rspencer@x10sys.com>
+
+Abstract
+========
+
+
+This document provides some details on LLVM's System Library, located in the
+source at ``lib/System`` and ``include/llvm/System``. The library's purpose is
+to shield LLVM from the differences between operating systems for the few
+services LLVM needs from the operating system. Much of LLVM is written using
+portability features of standard C++. However, in a few areas, system dependent
+facilities are needed and the System Library is the wrapper around those system
+calls.
+
+By centralizing LLVM's use of operating system interfaces, we make it possible
+for the LLVM tool chain and runtime libraries to be more easily ported to new
+platforms since (theoretically) only ``lib/System`` needs to be ported. This
+library also unclutters the rest of LLVM from #ifdef use and special cases for
+specific operating systems. Such uses are replaced with simple calls to the
+interfaces provided in ``include/llvm/System``.
+
+Note that the System Library is not intended to be a complete operating system
+wrapper (such as the Adaptive Communications Environment (ACE) or Apache
+Portable Runtime (APR)), but only provides the functionality necessary to
+support LLVM.
+
+The System Library was written by Reid Spencer who formulated the design based
+on similar work originating from the eXtensible Programming System (XPS).
+Several people helped with the effort; especially, Jeff Cohen and Henrik Bach
+on the Win32 port.
+
+Keeping LLVM Portable
+=====================
+
+In order to keep LLVM portable, LLVM developers should adhere to a set of
+portability rules associated with the System Library. Adherence to these rules
+should help the System Library achieve its goal of shielding LLVM from the
+variations in operating system interfaces and doing so efficiently. The
+following sections define the rules needed to fulfill this objective.
+
+Don't Include System Headers
+----------------------------
+
+Except in ``lib/System``, no LLVM source code should directly ``#include`` a
+system header. Care has been taken to remove all such ``#includes`` from LLVM
+while ``lib/System`` was being developed. Specifically this means that header
+files like "``unistd.h``", "``windows.h``", "``stdio.h``", and "``string.h``"
+are forbidden to be included by LLVM source code outside the implementation of
+``lib/System``.
+
+To obtain system-dependent functionality, existing interfaces to the system
+found in ``include/llvm/System`` should be used. If an appropriate interface is
+not available, it should be added to ``include/llvm/System`` and implemented in
+``lib/System`` for all supported platforms.
+
+Don't Expose System Headers
+---------------------------
+
+The System Library must shield LLVM from **all** system headers. To obtain
+system level functionality, LLVM source must ``#include "llvm/System/Thing.h"``
+and nothing else. This means that ``Thing.h`` cannot expose any system header
+files. This protects LLVM from accidentally using system specific functionality
+and only allows it via the ``lib/System`` interface.
+
+Use Standard C Headers
+----------------------
+
+The **standard** C headers (the ones beginning with "c") are allowed to be
+exposed through the ``lib/System`` interface. These headers and the things they
+declare are considered to be platform agnostic. LLVM source files may include
+them directly or obtain their inclusion through ``lib/System`` interfaces.
+
+Use Standard C++ Headers
+------------------------
+
+The **standard** C++ headers from the standard C++ library and standard
+template library may be exposed through the ``lib/System`` interface. These
+headers and the things they declare are considered to be platform agnostic.
+LLVM source files may include them or obtain their inclusion through
+``lib/System`` interfaces.
+
+High Level Interface
+--------------------
+
+The entry points specified in the interface of ``lib/System`` must be aimed at
+completing some reasonably high level task needed by LLVM. We do not want to
+simply wrap each operating system call. It would be preferable to wrap several
+operating system calls that are always used in conjunction with one another by
+LLVM.
+
+For example, consider what is needed to execute a program, wait for it to
+complete, and return its result code. On Unix, this involves the following
+operating system calls: ``getenv``, ``fork``, ``execve``, and ``wait``. The
+correct thing for ``lib/System`` to provide is a function, say
+``ExecuteProgramAndWait``, that implements the functionality completely. what
+we don't want is wrappers for the operating system calls involved.
+
+There must **not** be a one-to-one relationship between operating system
+calls and the System library's interface. Any such interface function will be
+suspicious.
+
+No Unused Functionality
+-----------------------
+
+There must be no functionality specified in the interface of ``lib/System``
+that isn't actually used by LLVM. We're not writing a general purpose operating
+system wrapper here, just enough to satisfy LLVM's needs. And, LLVM doesn't
+need much. This design goal aims to keep the ``lib/System`` interface small and
+understandable which should foster its actual use and adoption.
+
+No Duplicate Implementations
+----------------------------
+
+The implementation of a function for a given platform must be written exactly
+once. This implies that it must be possible to apply a function's
+implementation to multiple operating systems if those operating systems can
+share the same implementation. This rule applies to the set of operating
+systems supported for a given class of operating system (e.g. Unix, Win32).
+
+No Virtual Methods
+------------------
+
+The System Library interfaces can be called quite frequently by LLVM. In order
+to make those calls as efficient as possible, we discourage the use of virtual
+methods. There is no need to use inheritance for implementation differences, it
+just adds complexity. The ``#include`` mechanism works just fine.
+
+No Exposed Functions
+--------------------
+
+Any functions defined by system libraries (i.e. not defined by ``lib/System``)
+must not be exposed through the ``lib/System`` interface, even if the header
+file for that function is not exposed. This prevents inadvertent use of system
+specific functionality.
+
+For example, the ``stat`` system call is notorious for having variations in the
+data it provides. ``lib/System`` must not declare ``stat`` nor allow it to be
+declared. Instead it should provide its own interface to discovering
+information about files and directories. Those interfaces may be implemented in
+terms of ``stat`` but that is strictly an implementation detail. The interface
+provided by the System Library must be implemented on all platforms (even those
+without ``stat``).
+
+No Exposed Data
+---------------
+
+Any data defined by system libraries (i.e. not defined by ``lib/System``) must
+not be exposed through the ``lib/System`` interface, even if the header file
+for that function is not exposed. As with functions, this prevents inadvertent
+use of data that might not exist on all platforms.
+
+Minimize Soft Errors
+--------------------
+
+Operating system interfaces will generally provide error results for every
+little thing that could go wrong. In almost all cases, you can divide these
+error results into two groups: normal/good/soft and abnormal/bad/hard. That is,
+some of the errors are simply information like "file not found", "insufficient
+privileges", etc. while other errors are much harder like "out of space", "bad
+disk sector", or "system call interrupted". We'll call the first group "*soft*"
+errors and the second group "*hard*" errors.
+
+``lib/System`` must always attempt to minimize soft errors. This is a design
+requirement because the minimization of soft errors can affect the granularity
+and the nature of the interface. In general, if you find that you're wanting to
+throw soft errors, you must review the granularity of the interface because it
+is likely you're trying to implement something that is too low level. The rule
+of thumb is to provide interface functions that **can't** fail, except when
+faced with hard errors.
+
+For a trivial example, suppose we wanted to add an "``OpenFileForWriting``"
+function. For many operating systems, if the file doesn't exist, attempting to
+open the file will produce an error. However, ``lib/System`` should not simply
+throw that error if it occurs because its a soft error. The problem is that the
+interface function, ``OpenFileForWriting`` is too low level. It should be
+``OpenOrCreateFileForWriting``. In the case of the soft "doesn't exist" error,
+this function would just create it and then open it for writing.
+
+This design principle needs to be maintained in ``lib/System`` because it
+avoids the propagation of soft error handling throughout the rest of LLVM.
+Hard errors will generally just cause a termination for an LLVM tool so don't
+be bashful about throwing them.
+
+Rules of thumb:
+
+#. Don't throw soft errors, only hard errors.
+
+#. If you're tempted to throw a soft error, re-think the interface.
+
+#. Handle internally the most common normal/good/soft error conditions
+ so the rest of LLVM doesn't have to.
+
+No throw Specifications
+-----------------------
+
+None of the ``lib/System`` interface functions may be declared with C++
+``throw()`` specifications on them. This requirement makes sure that the
+compiler does not insert additional exception handling code into the interface
+functions. This is a performance consideration: ``lib/System`` functions are at
+the bottom of many call chains and as such can be frequently called. We need
+them to be as efficient as possible. However, no routines in the system
+library should actually throw exceptions.
+
+Code Organization
+-----------------
+
+Implementations of the System Library interface are separated by their general
+class of operating system. Currently only Unix and Win32 classes are defined
+but more could be added for other operating system classifications. To
+distinguish which implementation to compile, the code in ``lib/System`` uses
+the ``LLVM_ON_UNIX`` and ``LLVM_ON_WIN32`` ``#defines`` provided via configure
+through the ``llvm/Config/config.h`` file. Each source file in ``lib/System``,
+after implementing the generic (operating system independent) functionality
+needs to include the correct implementation using a set of
+``#if defined(LLVM_ON_XYZ)`` directives. For example, if we had
+``lib/System/File.cpp``, we'd expect to see in that file:
+
+.. code-block:: c++
+
+ #if defined(LLVM_ON_UNIX)
+ #include "Unix/File.cpp"
+ #endif
+ #if defined(LLVM_ON_WIN32)
+ #include "Win32/File.cpp"
+ #endif
+
+The implementation in ``lib/System/Unix/File.cpp`` should handle all Unix
+variants. The implementation in ``lib/System/Win32/File.cpp`` should handle all
+Win32 variants. What this does is quickly differentiate the basic class of
+operating system that will provide the implementation. The specific details for
+a given platform must still be determined through the use of ``#ifdef``.
+
+Consistent Semantics
+--------------------
+
+The implementation of a ``lib/System`` interface can vary drastically between
+platforms. That's okay as long as the end result of the interface function is
+the same. For example, a function to create a directory is pretty straight
+forward on all operating system. System V IPC on the other hand isn't even
+supported on all platforms. Instead of "supporting" System V IPC,
+``lib/System`` should provide an interface to the basic concept of
+inter-process communications. The implementations might use System V IPC if
+that was available or named pipes, or whatever gets the job done effectively
+for a given operating system. In all cases, the interface and the
+implementation must be semantically consistent.
+
diff --git a/docs/TableGenFundamentals.rst b/docs/TableGenFundamentals.rst
index bfb2618998..356b7d208e 100644
--- a/docs/TableGenFundamentals.rst
+++ b/docs/TableGenFundamentals.rst
@@ -120,16 +120,16 @@ this (at the time of this writing):
}
...
-This definition corresponds to a 32-bit register-register add instruction in the
-X86. The string after the '``def``' string indicates the name of the
-record---"``ADD32rr``" in this case---and the comment at the end of the line
-indicates the superclasses of the definition. The body of the record contains
-all of the data that TableGen assembled for the record, indicating that the
-instruction is part of the "X86" namespace, the pattern indicating how the the
-instruction should be emitted into the assembly file, that it is a two-address
-instruction, has a particular encoding, etc. The contents and semantics of the
-information in the record is specific to the needs of the X86 backend, and is
-only shown as an example.
+This definition corresponds to the 32-bit register-register ``add`` instruction
+of the the x86 architecture. ``def ADD32rr`` defines a record named
+``ADD32rr``, and the comment at the end of the line indicates the superclasses
+of the definition. The body of the record contains all of the data that
+TableGen assembled for the record, indicating that the instruction is part of
+the "X86" namespace, the pattern indicating how the the instruction should be
+emitted into the assembly file, that it is a two-address instruction, has a
+particular encoding, etc. The contents and semantics of the information in the
+record are specific to the needs of the X86 backend, and are only shown as an
+example.
As you can see, a lot of information is needed for every instruction supported
by the code generator, and specifying it all manually would be unmaintainable,
@@ -152,13 +152,12 @@ factor out the common features that instructions of its class share. A key
feature of TableGen is that it allows the end-user to define the abstractions
they prefer to use when describing their information.
-Each def record has a special entry called "``NAME``." This is the name of the
-def ("``ADD32rr``" above). In the general case def names can be formed from
-various kinds of string processing expressions and ``NAME`` resolves to the
+Each ``def`` record has a special entry called "NAME". This is the name of the
+record ("``ADD32rr``" above). In the general case ``def`` names can be formed
+from various kinds of string processing expressions and ``NAME`` resolves to the
final value obtained after resolving all of those expressions. The user may
-refer to ``NAME`` anywhere she desires to use the ultimate name of the def.
-``NAME`` should not be defined anywhere else in user code to avoid conflict
-problems.
+refer to ``NAME`` anywhere she desires to use the ultimate name of the ``def``.
+``NAME`` should not be defined anywhere else in user code to avoid conflicts.
Running TableGen
----------------
diff --git a/docs/TestSuiteMakefileGuide.html b/docs/TestSuiteMakefileGuide.html
deleted file mode 100644
index 1b24250380..0000000000
--- a/docs/TestSuiteMakefileGuide.html
+++ /dev/null
@@ -1,351 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
- "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
- <title>LLVM test-suite Makefile Guide</title>
- <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-<body>
-
-<h1>
- LLVM test-suite Makefile Guide
-</h1>
-
-<ol>
- <li><a href="#overview">Overview</a></li>
- <li><a href="#testsuitestructure">Test suite structure</a></li>
- <li><a href="#testsuiterun">Running the test suite</a>
- <ul>
- <li><a href="#testsuiteexternal">Configuring External Tests</a></li>
- <li><a href="#testsuitetests">Running different tests</a></li>
- <li><a href="#testsuiteoutput">Generating test output</a></li>
- <li><a href="#testsuitecustom">Writing custom tests for test-suite</a></li>
- </ul>
- </li>
-</ol>
-
-<div class="doc_author">
- <p>Written by John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner</p>
-</div>
-
-<!--=========================================================================-->
-<h2><a name="overview">Overview</a></h2>
-<!--=========================================================================-->
-
-<div>
-
-<p>This document describes the features of the Makefile-based LLVM
-test-suite. This way of interacting with the test-suite is deprecated in favor
-of running the test-suite using LNT, but may continue to prove useful for some
-users. See the Testing
-Guide's <a href="TestingGuide.html#testsuitequickstart">test-suite
-Quickstart</a> section for more information.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h2><a name="testsuitestructure">Test suite Structure</a></h2>
-<!--=========================================================================-->
-
-<div>
-
-<p>The <tt>test-suite</tt> module contains a number of programs that can be compiled
-with LLVM and executed. These programs are compiled using the native compiler
-and various LLVM backends. The output from the program compiled with the
-native compiler is assumed correct; the results from the other programs are
-compared to the native program output and pass if they match.</p>
-
-<p>When executing tests, it is usually a good idea to start out with a subset of
-the available tests or programs. This makes test run times smaller at first and
-later on this is useful to investigate individual test failures. To run some
-test only on a subset of programs, simply change directory to the programs you
-want tested and run <tt>gmake</tt> there. Alternatively, you can run a different
-test using the <tt>TEST</tt> variable to change what tests or run on the
-selected programs (see below for more info).</p>
-
-<p>In addition for testing correctness, the <tt>test-suite</tt> directory also
-performs timing tests of various LLVM optimizations. It also records
-compilation times for the compilers and the JIT. This information can be
-used to compare the effectiveness of LLVM's optimizations and code
-generation.</p>
-
-<p><tt>test-suite</tt> tests are divided into three types of tests: MultiSource,
-SingleSource, and External.</p>
-
-<ul>
-<li><tt>test-suite/SingleSource</tt>
-<p>The SingleSource directory contains test programs that are only a single
-source file in size. These are usually small benchmark programs or small
-programs that calculate a particular value. Several such programs are grouped
-together in each directory.</p></li>
-
-<li><tt>test-suite/MultiSource</tt>
-<p>The MultiSource directory contains subdirectories which contain entire
-programs with multiple source files. Large benchmarks and whole applications
-go here.</p></li>
-
-<li><tt>test-suite/External</tt>
-<p>The External directory contains Makefiles for building code that is external
-to (i.e., not distributed with) LLVM. The most prominent members of this
-directory are the SPEC 95 and SPEC 2000 benchmark suites. The <tt>External</tt>
-directory does not contain these actual tests, but only the Makefiles that know
-how to properly compile these programs from somewhere else. The presence and
-location of these external programs is configured by the test-suite
-<tt>configure</tt> script.</p></li>
-</ul>
-
-<p>Each tree is then subdivided into several categories, including applications,
-benchmarks, regression tests, code that is strange grammatically, etc. These
-organizations should be relatively self explanatory.</p>
-
-<p>Some tests are known to fail. Some are bugs that we have not fixed yet;
-others are features that we haven't added yet (or may never add). In the
-regression tests, the result for such tests will be XFAIL (eXpected FAILure).
-In this way, you can tell the difference between an expected and unexpected
-failure.</p>
-
-<p>The tests in the test suite have no such feature at this time. If the
-test passes, only warnings and other miscellaneous output will be generated. If
-a test fails, a large &lt;program&gt; FAILED message will be displayed. This
-will help you separate benign warnings from actual test failures.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h2><a name="testsuiterun">Running the test suite</a></h2>
-<!--=========================================================================-->
-
-<div>
-
-<p>First, all tests are executed within the LLVM object directory tree. They
-<i>are not</i> executed inside of the LLVM source tree. This is because the
-test suite creates temporary files during execution.</p>
-
-<p>To run the test suite, you need to use the following steps:</p>
-
-<ol>
- <li><tt>cd</tt> into the <tt>llvm/projects</tt> directory in your source tree.
- </li>
-
- <li><p>Check out the <tt>test-suite</tt> module with:</p>
-
-<div class="doc_code">
-<pre>
-% svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite
-</pre>
-</div>
- <p>This will get the test suite into <tt>llvm/projects/test-suite</tt>.</p>
- </li>
- <li><p>Configure and build <tt>llvm</tt>.</p></li>
- <li><p>Configure and build <tt>llvm-gcc</tt>.</p></li>
- <li><p>Install <tt>llvm-gcc</tt> somewhere.</p></li>
- <li><p><em>Re-configure</em> <tt>llvm</tt> from the top level of
- each build tree (LLVM object directory tree) in which you want
- to run the test suite, just as you do before building LLVM.</p>
- <p>During the <em>re-configuration</em>, you must either: (1)
- have <tt>llvm-gcc</tt> you just built in your path, or (2)
- specify the directory where your just-built <tt>llvm-gcc</tt> is
- installed using <tt>--with-llvmgccdir=$LLVM_GCC_DIR</tt>.</p>
- <p>You must also tell the configure machinery that the test suite
- is available so it can be configured for your build tree:</p>
-<div class="doc_code">
-<pre>
-% cd $LLVM_OBJ_ROOT ; $LLVM_SRC_ROOT/configure [--with-llvmgccdir=$LLVM_GCC_DIR]
-</pre>
-</div>
- <p>[Remember that <tt>$LLVM_GCC_DIR</tt> is the directory where you
- <em>installed</em> llvm-gcc, not its src or obj directory.]</p>
- </li>
-
- <li><p>You can now run the test suite from your build tree as follows:</p>
-<div class="doc_code">
-<pre>
-% cd $LLVM_OBJ_ROOT/projects/test-suite
-% make
-</pre>
-</div>
- </li>
-</ol>
-<p>Note that the second and third steps only need to be done once. After you
-have the suite checked out and configured, you don't need to do it again (unless
-the test code or configure script changes).</p>
-
-<!-- _______________________________________________________________________ -->
-<h3>
- <a name="testsuiteexternal">Configuring External Tests</a>
-</h3>
-<!-- _______________________________________________________________________ -->
-
-<div>
-<p>In order to run the External tests in the <tt>test-suite</tt>
- module, you must specify <i>--with-externals</i>. This
- must be done during the <em>re-configuration</em> step (see above),
- and the <tt>llvm</tt> re-configuration must recognize the
- previously-built <tt>llvm-gcc</tt>. If any of these is missing or
- neglected, the External tests won't work.</p>
-<dl>
-<dt><i>--with-externals</i></dt>
-<dt><i>--with-externals=&lt;<tt>directory</tt>&gt;</i></dt>
-</dl>
- This tells LLVM where to find any external tests. They are expected to be
- in specifically named subdirectories of &lt;<tt>directory</tt>&gt;.
- If <tt>directory</tt> is left unspecified,
- <tt>configure</tt> uses the default value
- <tt>/home/vadve/shared/benchmarks/speccpu2000/benchspec</tt>.
- Subdirectory names known to LLVM include:
- <dl>
- <dt>spec95</dt>
- <dt>speccpu2000</dt>
- <dt>speccpu2006</dt>
- <dt>povray31</dt>
- </dl>
- Others are added from time to time, and can be determined from
- <tt>configure</tt>.
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3>
- <a name="testsuitetests">Running different tests</a>
-</h3>
-<!-- _______________________________________________________________________ -->
-<div>
-<p>In addition to the regular "whole program" tests, the <tt>test-suite</tt>
-module also provides a mechanism for compiling the programs in different ways.
-If the variable TEST is defined on the <tt>gmake</tt> command line, the test system will
-include a Makefile named <tt>TEST.&lt;value of TEST variable&gt;.Makefile</tt>.
-This Makefile can modify build rules to yield different results.</p>
-
-<p>For example, the LLVM nightly tester uses <tt>TEST.nightly.Makefile</tt> to
-create the nightly test reports. To run the nightly tests, run <tt>gmake
-TEST=nightly</tt>.</p>
-
-<p>There are several TEST Makefiles available in the tree. Some of them are
-designed for internal LLVM research and will not work outside of the LLVM
-research group. They may still be valuable, however, as a guide to writing your
-own TEST Makefile for any optimization or analysis passes that you develop with
-LLVM.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3>
- <a name="testsuiteoutput">Generating test output</a>
-</h3>
-<!-- _______________________________________________________________________ -->
-<div>
- <p>There are a number of ways to run the tests and generate output. The most
- simple one is simply running <tt>gmake</tt> with no arguments. This will
- compile and run all programs in the tree using a number of different methods
- and compare results. Any failures are reported in the output, but are likely
- drowned in the other output. Passes are not reported explicitly.</p>
-
- <p>Somewhat better is running <tt>gmake TEST=sometest test</tt>, which runs
- the specified test and usually adds per-program summaries to the output
- (depending on which sometest you use). For example, the <tt>nightly</tt> test
- explicitly outputs TEST-PASS or TEST-FAIL for every test after each program.
- Though these lines are still drowned in the output, it's easy to grep the
- output logs in the Output directories.</p>
-
- <p>Even better are the <tt>report</tt> and <tt>report.format</tt> targets
- (where <tt>format</tt> is one of <tt>html</tt>, <tt>csv</tt>, <tt>text</tt> or
- <tt>graphs</tt>). The exact contents of the report are dependent on which
- <tt>TEST</tt> you are running, but the text results are always shown at the
- end of the run and the results are always stored in the
- <tt>report.&lt;type&gt;.format</tt> file (when running with
- <tt>TEST=&lt;type&gt;</tt>).
-
- The <tt>report</tt> also generate a file called
- <tt>report.&lt;type&gt;.raw.out</tt> containing the output of the entire test
- run.
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3>
- <a name="testsuitecustom">Writing custom tests for the test suite</a>
-</h3>
-<!-- _______________________________________________________________________ -->
-
-<div>
-
-<p>Assuming you can run the test suite, (e.g. "<tt>gmake TEST=nightly report</tt>"
-should work), it is really easy to run optimizations or code generator
-components against every program in the tree, collecting statistics or running
-custom checks for correctness. At base, this is how the nightly tester works,
-it's just one example of a general framework.</p>
-
-<p>Lets say that you have an LLVM optimization pass, and you want to see how
-many times it triggers. First thing you should do is add an LLVM
-<a href="ProgrammersManual.html#Statistic">statistic</a> to your pass, which
-will tally counts of things you care about.</p>
-
-<p>Following this, you can set up a test and a report that collects these and
-formats them for easy viewing. This consists of two files, a
-"<tt>test-suite/TEST.XXX.Makefile</tt>" fragment (where XXX is the name of your
-test) and a "<tt>test-suite/TEST.XXX.report</tt>" file that indicates how to
-format the output into a table. There are many example reports of various
-levels of sophistication included with the test suite, and the framework is very
-general.</p>
-
-<p>If you are interested in testing an optimization pass, check out the
-"libcalls" test as an example. It can be run like this:<p>
-
-<div class="doc_code">
-<pre>
-% cd llvm/projects/test-suite/MultiSource/Benchmarks # or some other level
-% make TEST=libcalls report
-</pre>
-</div>
-
-<p>This will do a bunch of stuff, then eventually print a table like this:</p>
-
-<div class="doc_code">
-<pre>
-Name | total | #exit |
-...
-FreeBench/analyzer/analyzer | 51 | 6 |
-FreeBench/fourinarow/fourinarow | 1 | 1 |
-FreeBench/neural/neural | 19 | 9 |
-FreeBench/pifft/pifft | 5 | 3 |
-MallocBench/cfrac/cfrac | 1 | * |
-MallocBench/espresso/espresso | 52 | 12 |
-MallocBench/gs/gs | 4 | * |
-Prolangs-C/TimberWolfMC/timberwolfmc | 302 | * |
-Prolangs-C/agrep/agrep | 33 | 12 |
-Prolangs-C/allroots/allroots | * | * |
-Prolangs-C/assembler/assembler | 47 | * |
-Prolangs-C/bison/mybison | 74 | * |
-...
-</pre>
-</div>
-
-<p>This basically is grepping the -stats output and displaying it in a table.
-You can also use the "TEST=libcalls report.html" target to get the table in HTML
-form, similarly for report.csv and report.tex.</p>
-
-<p>The source for this is in test-suite/TEST.libcalls.*. The format is pretty
-simple: the Makefile indicates how to run the test (in this case,
-"<tt>opt -simplify-libcalls -stats</tt>"), and the report contains one line for
-each column of the output. The first value is the header for the column and the
-second is the regex to grep the output of the command for. There are lots of
-example reports that can do fancy stuff.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-
-<hr>
-<address>
- <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
- src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
- <a href="http://validator.w3.org/check/referer"><img
- src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
- John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner<br>
- <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
- Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/TestSuiteMakefileGuide.rst b/docs/TestSuiteMakefileGuide.rst
new file mode 100644
index 0000000000..b10379ef4d
--- /dev/null
+++ b/docs/TestSuiteMakefileGuide.rst
@@ -0,0 +1,279 @@
+==============================
+LLVM test-suite Makefile Guide
+==============================
+
+Written by John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya
+Lattner
+
+.. contents::
+ :local:
+
+Overview
+========
+
+This document describes the features of the Makefile-based LLVM
+test-suite. This way of interacting with the test-suite is deprecated in
+favor of running the test-suite using LNT, but may continue to prove
+useful for some users. See the Testing Guide's :ref:`test-suite Quickstart
+<test-suite-quickstart>` section for more information.
+
+Test suite Structure
+====================
+
+The ``test-suite`` module contains a number of programs that can be
+compiled with LLVM and executed. These programs are compiled using the
+native compiler and various LLVM backends. The output from the program
+compiled with the native compiler is assumed correct; the results from
+the other programs are compared to the native program output and pass if
+they match.
+
+When executing tests, it is usually a good idea to start out with a
+subset of the available tests or programs. This makes test run times
+smaller at first and later on this is useful to investigate individual
+test failures. To run some test only on a subset of programs, simply
+change directory to the programs you want tested and run ``gmake``
+there. Alternatively, you can run a different test using the ``TEST``
+variable to change what tests or run on the selected programs (see below
+for more info).
+
+In addition for testing correctness, the ``test-suite`` directory also
+performs timing tests of various LLVM optimizations. It also records
+compilation times for the compilers and the JIT. This information can be
+used to compare the effectiveness of LLVM's optimizations and code
+generation.
+
+``test-suite`` tests are divided into three types of tests: MultiSource,
+SingleSource, and External.
+
+- ``test-suite/SingleSource``
+
+ The SingleSource directory contains test programs that are only a
+ single source file in size. These are usually small benchmark
+ programs or small programs that calculate a particular value. Several
+ such programs are grouped together in each directory.
+
+- ``test-suite/MultiSource``
+
+ The MultiSource directory contains subdirectories which contain
+ entire programs with multiple source files. Large benchmarks and
+ whole applications go here.
+
+- ``test-suite/External``
+
+ The External directory contains Makefiles for building code that is
+ external to (i.e., not distributed with) LLVM. The most prominent
+ members of this directory are the SPEC 95 and SPEC 2000 benchmark
+ suites. The ``External`` directory does not contain these actual
+ tests, but only the Makefiles that know how to properly compile these
+ programs from somewhere else. The presence and location of these
+ external programs is configured by the test-suite ``configure``
+ script.
+
+Each tree is then subdivided into several categories, including
+applications, benchmarks, regression tests, code that is strange
+grammatically, etc. These organizations should be relatively self
+explanatory.
+
+Some tests are known to fail. Some are bugs that we have not fixed yet;
+others are features that we haven't added yet (or may never add). In the
+regression tests, the result for such tests will be XFAIL (eXpected
+FAILure). In this way, you can tell the difference between an expected
+and unexpected failure.
+
+The tests in the test suite have no such feature at this time. If the
+test passes, only warnings and other miscellaneous output will be
+generated. If a test fails, a large <program> FAILED message will be
+displayed. This will help you separate benign warnings from actual test
+failures.
+
+Running the test suite
+======================
+
+First, all tests are executed within the LLVM object directory tree.
+They *are not* executed inside of the LLVM source tree. This is because
+the test suite creates temporary files during execution.
+
+To run the test suite, you need to use the following steps:
+
+#. ``cd`` into the ``llvm/projects`` directory in your source tree.
+#. Check out the ``test-suite`` module with:
+
+ .. code-block:: bash
+
+ % svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite
+
+ This will get the test suite into ``llvm/projects/test-suite``.
+
+#. Configure and build ``llvm``.
+
+#. Configure and build ``llvm-gcc``.
+
+#. Install ``llvm-gcc`` somewhere.
+
+#. *Re-configure* ``llvm`` from the top level of each build tree (LLVM
+ object directory tree) in which you want to run the test suite, just
+ as you do before building LLVM.
+
+ During the *re-configuration*, you must either: (1) have ``llvm-gcc``
+ you just built in your path, or (2) specify the directory where your
+ just-built ``llvm-gcc`` is installed using
+ ``--with-llvmgccdir=$LLVM_GCC_DIR``.
+
+ You must also tell the configure machinery that the test suite is
+ available so it can be configured for your build tree:
+
+ .. code-block:: bash
+
+ % cd $LLVM_OBJ_ROOT ; $LLVM_SRC_ROOT/configure [--with-llvmgccdir=$LLVM_GCC_DIR]
+
+ [Remember that ``$LLVM_GCC_DIR`` is the directory where you
+ *installed* llvm-gcc, not its src or obj directory.]
+
+#. You can now run the test suite from your build tree as follows:
+
+ .. code-block:: bash
+
+ % cd $LLVM_OBJ_ROOT/projects/test-suite
+ % make
+
+Note that the second and third steps only need to be done once. After
+you have the suite checked out and configured, you don't need to do it
+again (unless the test code or configure script changes).
+
+Configuring External Tests
+--------------------------
+
+In order to run the External tests in the ``test-suite`` module, you
+must specify *--with-externals*. This must be done during the
+*re-configuration* step (see above), and the ``llvm`` re-configuration
+must recognize the previously-built ``llvm-gcc``. If any of these is
+missing or neglected, the External tests won't work.
+
+* *--with-externals*
+
+* *--with-externals=<directory>*
+
+This tells LLVM where to find any external tests. They are expected to
+be in specifically named subdirectories of <``directory``>. If
+``directory`` is left unspecified, ``configure`` uses the default value
+``/home/vadve/shared/benchmarks/speccpu2000/benchspec``. Subdirectory
+names known to LLVM include:
+
+* spec95
+
+* speccpu2000
+
+* speccpu2006
+
+* povray31
+
+Others are added from time to time, and can be determined from
+``configure``.
+
+Running different tests
+-----------------------
+
+In addition to the regular "whole program" tests, the ``test-suite``
+module also provides a mechanism for compiling the programs in different
+ways. If the variable TEST is defined on the ``gmake`` command line, the
+test system will include a Makefile named
+``TEST.<value of TEST variable>.Makefile``. This Makefile can modify
+build rules to yield different results.
+
+For example, the LLVM nightly tester uses ``TEST.nightly.Makefile`` to
+create the nightly test reports. To run the nightly tests, run
+``gmake TEST=nightly``.
+
+There are several TEST Makefiles available in the tree. Some of them are
+designed for internal LLVM research and will not work outside of the
+LLVM research group. They may still be valuable, however, as a guide to
+writing your own TEST Makefile for any optimization or analysis passes
+that you develop with LLVM.
+
+Generating test output
+----------------------
+
+There are a number of ways to run the tests and generate output. The
+most simple one is simply running ``gmake`` with no arguments. This will
+compile and run all programs in the tree using a number of different
+methods and compare results. Any failures are reported in the output,
+but are likely drowned in the other output. Passes are not reported
+explicitly.
+
+Somewhat better is running ``gmake TEST=sometest test``, which runs the
+specified test and usually adds per-program summaries to the output
+(depending on which sometest you use). For example, the ``nightly`` test
+explicitly outputs TEST-PASS or TEST-FAIL for every test after each
+program. Though these lines are still drowned in the output, it's easy
+to grep the output logs in the Output directories.
+
+Even better are the ``report`` and ``report.format`` targets (where
+``format`` is one of ``html``, ``csv``, ``text`` or ``graphs``). The
+exact contents of the report are dependent on which ``TEST`` you are
+running, but the text results are always shown at the end of the run and
+the results are always stored in the ``report.<type>.format`` file (when
+running with ``TEST=<type>``). The ``report`` also generate a file
+called ``report.<type>.raw.out`` containing the output of the entire
+test run.
+
+Writing custom tests for the test suite
+---------------------------------------
+
+Assuming you can run the test suite, (e.g.
+"``gmake TEST=nightly report``" should work), it is really easy to run
+optimizations or code generator components against every program in the
+tree, collecting statistics or running custom checks for correctness. At
+base, this is how the nightly tester works, it's just one example of a
+general framework.
+
+Lets say that you have an LLVM optimization pass, and you want to see
+how many times it triggers. First thing you should do is add an LLVM
+`statistic <ProgrammersManual.html#Statistic>`_ to your pass, which will
+tally counts of things you care about.
+
+Following this, you can set up a test and a report that collects these
+and formats them for easy viewing. This consists of two files, a
+"``test-suite/TEST.XXX.Makefile``" fragment (where XXX is the name of
+your test) and a "``test-suite/TEST.XXX.report``" file that indicates
+how to format the output into a table. There are many example reports of
+various levels of sophistication included with the test suite, and the
+framework is very general.
+
+If you are interested in testing an optimization pass, check out the
+"libcalls" test as an example. It can be run like this:
+
+.. code-block:: bash
+
+ % cd llvm/projects/test-suite/MultiSource/Benchmarks # or some other level
+ % make TEST=libcalls report
+
+This will do a bunch of stuff, then eventually print a table like this:
+
+::
+
+ Name | total | #exit |
+ ...
+ FreeBench/analyzer/analyzer | 51 | 6 |
+ FreeBench/fourinarow/fourinarow | 1 | 1 |
+ FreeBench/neural/neural | 19 | 9 |
+ FreeBench/pifft/pifft | 5 | 3 |
+ MallocBench/cfrac/cfrac | 1 | * |
+ MallocBench/espresso/espresso | 52 | 12 |
+ MallocBench/gs/gs | 4 | * |
+ Prolangs-C/TimberWolfMC/timberwolfmc | 302 | * |
+ Prolangs-C/agrep/agrep | 33 | 12 |
+ Prolangs-C/allroots/allroots | * | * |
+ Prolangs-C/assembler/assembler | 47 | * |
+ Prolangs-C/bison/mybison | 74 | * |
+ ...
+
+This basically is grepping the -stats output and displaying it in a
+table. You can also use the "TEST=libcalls report.html" target to get
+the table in HTML form, similarly for report.csv and report.tex.
+
+The source for this is in ``test-suite/TEST.libcalls.*``. The format is
+pretty simple: the Makefile indicates how to run the test (in this case,
+"``opt -simplify-libcalls -stats``"), and the report contains one line
+for each column of the output. The first value is the header for the
+column and the second is the regex to grep the output of the command
+for. There are lots of example reports that can do fancy stuff.
diff --git a/docs/TestingGuide.html b/docs/TestingGuide.html
deleted file mode 100644
index d90c8ad1c3..0000000000
--- a/docs/TestingGuide.html
+++ /dev/null
@@ -1,916 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
- "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
- <title>LLVM Testing Infrastructure Guide</title>
- <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-<body>
-
-<h1>
- LLVM Testing Infrastructure Guide
-</h1>
-
-<ol>
- <li><a href="#overview">Overview</a></li>
- <li><a href="#requirements">Requirements</a></li>
- <li><a href="#org">LLVM testing infrastructure organization</a>
- <ul>
- <li><a href="#regressiontests">Regression tests</a></li>
- <li><a href="#testsuite"><tt>test-suite</tt></a></li>
- <li><a href="#debuginfotests">Debugging Information tests</a></li>
- </ul>
- </li>
- <li><a href="#quick">Quick start</a>
- <ul>
- <li><a href="#quickregressiontests">Regression tests</a></li>
- <li><a href="#quickdebuginfotests">Debugging Information tests</a></li>
- </ul>
- </li>
- <li><a href="#rtstructure">Regression test structure</a>
- <ul>
- <li><a href="#rtcustom">Writing new regression tests</a></li>
- <li><a href="#FileCheck">The FileCheck utility</a></li>
- <li><a href="#rtvars">Variables and substitutions</a></li>
- <li><a href="#rtfeatures">Other features</a></li>
- </ul>
- </li>
- <li><a href="#testsuiteoverview"><tt>test-suite</tt> Overview</a>
- <ul>
- <li><a href="#testsuitequickstart"><tt>test-suite</tt> Quickstart</a></li>
- <li><a href="#testsuitemakefiles"><tt>test-suite</tt> Makefiles</a></li>
- </ul>
- </li>
-</ol>
-
-<div class="doc_author">
- <p>Written by John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner</p>
-</div>
-
-<!--=========================================================================-->
-<h2><a name="overview">Overview</a></h2>
-<!--=========================================================================-->
-
-<div>
-
-<p>This document is the reference manual for the LLVM testing infrastructure. It
-documents the structure of the LLVM testing infrastructure, the tools needed to
-use it, and how to add and run tests.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h2><a name="requirements">Requirements</a></h2>
-<!--=========================================================================-->
-
-<div>
-
-<p>In order to use the LLVM testing infrastructure, you will need all of the
-software required to build LLVM, as well
-as <a href="http://python.org">Python</a> 2.4 or later.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h2><a name="org">LLVM testing infrastructure organization</a></h2>
-<!--=========================================================================-->
-
-<div>
-
-<p>The LLVM testing infrastructure contains two major categories of tests:
-regression tests and whole programs. The regression tests are contained inside
-the LLVM repository itself under <tt>llvm/test</tt> and are expected to always
-pass -- they should be run before every commit.</p>
-
-<p>The whole programs tests are referred to as the "LLVM test suite" (or
-"test-suite") and are in the <tt>test-suite</tt> module in subversion. For
-historical reasons, these tests are also referred to as the "nightly tests" in
-places, which is less ambiguous than "test-suite" and remains in use although we
-run them much more often than nightly.</p>
-
-<!-- _______________________________________________________________________ -->
-<h3><a name="regressiontests">Regression tests</a></h3>
-<!-- _______________________________________________________________________ -->
-
-<div>
-
-<p>The regression tests are small pieces of code that test a specific feature of
-LLVM or trigger a specific bug in LLVM. They are usually written in LLVM
-assembly language, but can be written in other languages if the test targets a
-particular language front end (and the appropriate <tt>--with-llvmgcc</tt>
-options were used at <tt>configure</tt> time of the <tt>llvm</tt> module). These
-tests are driven by the 'lit' testing tool, which is part of LLVM.</p>
-
-<p>These code fragments are not complete programs. The code generated
-from them is never executed to determine correct behavior.</p>
-
-<p>These code fragment tests are located in the <tt>llvm/test</tt>
-directory.</p>
-
-<p>Typically when a bug is found in LLVM, a regression test containing
-just enough code to reproduce the problem should be written and placed
-somewhere underneath this directory. In most cases, this will be a small
-piece of LLVM assembly language code, often distilled from an actual
-application or benchmark.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3><a name="testsuite"><tt>test-suite</tt></a></h3>
-<!-- _______________________________________________________________________ -->
-
-<div>
-
-<p>The test suite contains whole programs, which are pieces of code which can be
-compiled and linked into a stand-alone program that can be executed. These
-programs are generally written in high level languages such as C or C++.</p>
-
-<p>These programs are compiled using a user specified compiler and set of flags,
-and then executed to capture the program output and timing information. The
-output of these programs is compared to a reference output to ensure that the
-program is being compiled correctly.</p>
-
-<p>In addition to compiling and executing programs, whole program tests serve as
-a way of benchmarking LLVM performance, both in terms of the efficiency of the
-programs generated as well as the speed with which LLVM compiles, optimizes, and
-generates code.</p>
-
-<p>The test-suite is located in the <tt>test-suite</tt> Subversion module.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3><a name="debuginfotests">Debugging Information tests</a></h3>
-<!-- _______________________________________________________________________ -->
-
-<div>
-
-<p>The test suite contains tests to check quality of debugging information.
-The test are written in C based languages or in LLVM assembly language. </p>
-
-<p>These tests are compiled and run under a debugger. The debugger output
-is checked to validate of debugging information. See README.txt in the
-test suite for more information . This test suite is located in the
-<tt>debuginfo-tests</tt> Subversion module. </p>
-
-</div>
-
-</div>
-
-<!--=========================================================================-->
-<h2><a name="quick">Quick start</a></h2>
-<!--=========================================================================-->
-
-<div>
-
- <p>The tests are located in two separate Subversion modules. The regressions
- tests are in the main "llvm" module under the directory
- <tt>llvm/test</tt> (so you get these tests for free with the main llvm
- tree). Use "make check-all" to run the regression tests after building
- LLVM.</p>
-
- <p>The more comprehensive test suite that includes whole programs in C and C++
- is in the <tt>test-suite</tt>
- module. See <a href="#testsuitequickstart"><tt>test-suite</tt> Quickstart</a>
- for more information on running these tests.</p>
-
-<!-- _______________________________________________________________________ -->
-<h3><a name="quickregressiontests">Regression tests</a></h3>
-<div>
-<!-- _______________________________________________________________________ -->
-<p>To run all of the LLVM regression tests, use master Makefile in
- the <tt>llvm/test</tt> directory:</p>
-
-<div class="doc_code">
-<pre>
-% gmake -C llvm/test
-</pre>
-</div>
-
-<p>or</p>
-
-<div class="doc_code">
-<pre>
-% gmake check
-</pre>
-</div>
-
-<p>If you have <a href="http://clang.llvm.org/">Clang</a> checked out and built,
-you can run the LLVM and Clang tests simultaneously using:</p>
-
-<p>or</p>
-
-<div class="doc_code">
-<pre>
-% gmake check-all
-</pre>
-</div>
-
-<p>To run the tests with Valgrind (Memcheck by default), just append
-<tt>VG=1</tt> to the commands above, e.g.:</p>
-
-<div class="doc_code">
-<pre>
-% gmake check VG=1
-</pre>
-</div>
-
-<p>To run individual tests or subsets of tests, you can use the 'llvm-lit'
-script which is built as part of LLVM. For example, to run the
-'Integer/BitPacked.ll' test by itself you can run:</p>
-
-<div class="doc_code">
-<pre>
-% llvm-lit ~/llvm/test/Integer/BitPacked.ll
-</pre>
-</div>
-
-<p>or to run all of the ARM CodeGen tests:</p>
-
-<div class="doc_code">
-<pre>
-% llvm-lit ~/llvm/test/CodeGen/ARM
-</pre>
-</div>
-
-<p>For more information on using the 'lit' tool, see 'llvm-lit --help' or the
-'lit' man page.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3><a name="quickdebuginfotests">Debugging Information tests</a></h3>
-<div>
-<!-- _______________________________________________________________________ -->
-<div>
-
-<p> To run debugging information tests simply checkout the tests inside
-clang/test directory. </p>
-
-<div class="doc_code">
-<pre>
-%cd clang/test
-% svn co http://llvm.org/svn/llvm-project/debuginfo-tests/trunk debuginfo-tests
-</pre>
-</div>
-
-<p> These tests are already set up to run as part of clang regression tests.</p>
-
-</div>
-
-</div>
-
-</div>
-
-<!--=========================================================================-->
-<h2><a name="rtstructure">Regression test structure</a></h2>
-<!--=========================================================================-->
-<div>
- <p>The LLVM regression tests are driven by 'lit' and are located in
- the <tt>llvm/test</tt> directory.
-
- <p>This directory contains a large array of small tests
- that exercise various features of LLVM and to ensure that regressions do not
- occur. The directory is broken into several sub-directories, each focused on
- a particular area of LLVM. A few of the important ones are:</p>
-
- <ul>
- <li><tt>Analysis</tt>: checks Analysis passes.</li>
- <li><tt>Archive</tt>: checks the Archive library.</li>
- <li><tt>Assembler</tt>: checks Assembly reader/writer functionality.</li>
- <li><tt>Bitcode</tt>: checks Bitcode reader/writer functionality.</li>
- <li><tt>CodeGen</tt>: checks code generation and each target.</li>
- <li><tt>Features</tt>: checks various features of the LLVM language.</li>
- <li><tt>Linker</tt>: tests bitcode linking.</li>
- <li><tt>Transforms</tt>: tests each of the scalar, IPO, and utility
- transforms to ensure they make the right transformations.</li>
- <li><tt>Verifier</tt>: tests the IR verifier.</li>
- </ul>
-
-<!-- _______________________________________________________________________ -->
-<h3><a name="rtcustom">Writing new regression tests</a></h3>
-<!-- _______________________________________________________________________ -->
-<div>
- <p>The regression test structure is very simple, but does require some
- information to be set. This information is gathered via <tt>configure</tt> and
- is written to a file, <tt>lit.site.cfg</tt>
- in <tt>llvm/test</tt>. The <tt>llvm/test</tt> Makefile does this work for
- you.</p>
-
- <p>In order for the regression tests to work, each directory of tests must
- have a <tt>lit.local.cfg</tt> file. Lit looks for this file to determine how
- to run the tests. This file is just Python code and thus is very flexible,
- but we've standardized it for the LLVM regression tests. If you're adding a
- directory of tests, just copy <tt>lit.local.cfg</tt> from another directory to
- get running. The standard <tt>lit.local.cfg</tt> simply specifies which files
- to look in for tests. Any directory that contains only directories does not
- need the <tt>lit.local.cfg</tt> file. Read the
- <a href="http://llvm.org/cmds/lit.html">Lit documentation</a> for more
- information. </p>
-
- <p>The <tt>llvm-runtests</tt> function looks at each file that is passed to
- it and gathers any lines together that match "RUN:". These are the "RUN" lines
- that specify how the test is to be run. So, each test script must contain
- RUN lines if it is to do anything. If there are no RUN lines, the
- <tt>llvm-runtests</tt> function will issue an error and the test will
- fail.</p>
-
- <p>RUN lines are specified in the comments of the test program using the
- keyword <tt>RUN</tt> followed by a colon, and lastly the command (pipeline)
- to execute. Together, these lines form the "script" that
- <tt>llvm-runtests</tt> executes to run the test case. The syntax of the
- RUN lines is similar to a shell's syntax for pipelines including I/O
- redirection and variable substitution. However, even though these lines
- may <i>look</i> like a shell script, they are not. RUN lines are interpreted
- directly by the Tcl <tt>exec</tt> command. They are never executed by a
- shell. Consequently the syntax differs from normal shell script syntax in a
- few ways. You can specify as many RUN lines as needed.</p>
-
- <p>lit performs substitution on each RUN line to replace LLVM tool
- names with the full paths to the executable built for each tool (in
- $(LLVM_OBJ_ROOT)/$(BuildMode)/bin). This ensures that lit does not
- invoke any stray LLVM tools in the user's path during testing.</p>
-
- <p>Each RUN line is executed on its own, distinct from other lines unless
- its last character is <tt>\</tt>. This continuation character causes the RUN
- line to be concatenated with the next one. In this way you can build up long
- pipelines of commands without making huge line lengths. The lines ending in
- <tt>\</tt> are concatenated until a RUN line that doesn't end in <tt>\</tt> is
- found. This concatenated set of RUN lines then constitutes one execution.
- Tcl will substitute variables and arrange for the pipeline to be executed. If
- any process in the pipeline fails, the entire line (and test case) fails too.
- </p>
-
- <p> Below is an example of legal RUN lines in a <tt>.ll</tt> file:</p>
-
-<div class="doc_code">
-<pre>
-; RUN: llvm-as &lt; %s | llvm-dis &gt; %t1
-; RUN: llvm-dis &lt; %s.bc-13 &gt; %t2
-; RUN: diff %t1 %t2
-</pre>
-</div>
-
- <p>As with a Unix shell, the RUN: lines permit pipelines and I/O redirection
- to be used. However, the usage is slightly different than for Bash. To check
- what's legal, see the documentation for the
- <a href="http://www.tcl.tk/man/tcl8.5/TclCmd/exec.htm#M2">Tcl exec</a>
- command and the
- <a href="http://www.tcl.tk/man/tcl8.5/tutorial/Tcl26.html">tutorial</a>.
- The major differences are:</p>
- <ul>
- <li>You can't do <tt>2&gt;&amp;1</tt>. That will cause Tcl to write to a
- file named <tt>&amp;1</tt>. Usually this is done to get stderr to go through
- a pipe. You can do that in tcl with <tt>|&amp;</tt> so replace this idiom:
- <tt>... 2&gt;&amp;1 | grep</tt> with <tt>... |&amp; grep</tt></li>
- <li>You can only redirect to a file, not to another descriptor and not from
- a here document.</li>
- <li>tcl supports redirecting to open files with the @ syntax but you
- shouldn't use that here.</li>
- </ul>
-
- <p>There are some quoting rules that you must pay attention to when writing
- your RUN lines. In general nothing needs to be quoted. Tcl won't strip off any
- quote characters so they will get passed to the invoked program. For
- example:</p>
-
-<div class="doc_code">
-<pre>
-... | grep 'find this string'
-</pre>
-</div>
-
- <p>This will fail because the ' characters are passed to grep. This would
- instruction grep to look for <tt>'find</tt> in the files <tt>this</tt> and
- <tt>string'</tt>. To avoid this use curly braces to tell Tcl that it should
- treat everything enclosed as one value. So our example would become:</p>
-
-<div class="doc_code">
-<pre>
-... | grep {find this string}
-</pre>
-</div>
-
- <p>Additionally, the characters <tt>[</tt> and <tt>]</tt> are treated
- specially by Tcl. They tell Tcl to interpret the content as a command to
- execute. Since these characters are often used in regular expressions this can
- have disastrous results and cause the entire test run in a directory to fail.
- For example, a common idiom is to look for some basicblock number:</p>
-
-<div class="doc_code">
-<pre>
-... | grep bb[2-8]
-</pre>
-</div>
-
- <p>This, however, will cause Tcl to fail because its going to try to execute
- a program named "2-8". Instead, what you want is this:</p>
-
-<div class="doc_code">
-<pre>
-... | grep {bb\[2-8\]}
-</pre>
-</div>
-
- <p>Finally, if you need to pass the <tt>\</tt> character down to a program,
- then it must be doubled. This is another Tcl special character. So, suppose
- you had:
-
-<div class="doc_code">
-<pre>
-... | grep 'i32\*'
-</pre>
-</div>
-
- <p>This will fail to match what you want (a pointer to i32). First, the
- <tt>'</tt> do not get stripped off. Second, the <tt>\</tt> gets stripped off
- by Tcl so what grep sees is: <tt>'i32*'</tt>. That's not likely to match
- anything. To resolve this you must use <tt>\\</tt> and the <tt>{}</tt>, like
- this:</p>
-
-<div class="doc_code">
-<pre>
-... | grep {i32\\*}
-</pre>
-</div>
-
-<p>If your system includes GNU <tt>grep</tt>, make sure
-that <tt>GREP_OPTIONS</tt> is not set in your environment. Otherwise,
-you may get invalid results (both false positives and false
-negatives).</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3><a name="FileCheck">The FileCheck utility</a></h3>
-<!-- _______________________________________________________________________ -->
-
-<div>
-
-<p>A powerful feature of the RUN: lines is that it allows any arbitrary commands
- to be executed as part of the test harness. While standard (portable) unix
- tools like 'grep' work fine on run lines, as you see above, there are a lot
- of caveats due to interaction with Tcl syntax, and we want to make sure the
- run lines are portable to a wide range of systems. Another major problem is
- that grep is not very good at checking to verify that the output of a tools
- contains a series of different output in a specific order. The FileCheck
- tool was designed to help with these problems.</p>
-
-<p>FileCheck (whose basic command line arguments are described in <a
- href="http://llvm.org/cmds/FileCheck.html">the FileCheck man page</a> is
- designed to read a file to check from standard input, and the set of things
- to verify from a file specified as a command line argument. A simple example
- of using FileCheck from a RUN line looks like this:</p>
-
-<div class="doc_code">
-<pre>
-; RUN: llvm-as &lt; %s | llc -march=x86-64 | <b>FileCheck %s</b>
-</pre>
-</div>
-
-<p>This syntax says to pipe the current file ("%s") into llvm-as, pipe that into
-llc, then pipe the output of llc into FileCheck. This means that FileCheck will
-be verifying its standard input (the llc output) against the filename argument
-specified (the original .ll file specified by "%s"). To see how this works,
-let's look at the rest of the .ll file (after the RUN line):</p>
-
-<div class="doc_code">
-<pre>
-define void @sub1(i32* %p, i32 %v) {
-entry:
-; <b>CHECK: sub1:</b>
-; <b>CHECK: subl</b>
- %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)
- ret void
-}
-
-define void @inc4(i64* %p) {
-entry:
-; <b>CHECK: inc4:</b>
-; <b>CHECK: incq</b>
- %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)
- ret void
-}
-</pre>
-</div>
-
-<p>Here you can see some "CHECK:" lines specified in comments. Now you can see
-how the file is piped into llvm-as, then llc, and the machine code output is
-what we are verifying. FileCheck checks the machine code output to verify that
-it matches what the "CHECK:" lines specify.</p>
-
-<p>The syntax of the CHECK: lines is very simple: they are fixed strings that
-must occur in order. FileCheck defaults to ignoring horizontal whitespace
-differences (e.g. a space is allowed to match a tab) but otherwise, the contents
-of the CHECK: line is required to match some thing in the test file exactly.</p>
-
-<p>One nice thing about FileCheck (compared to grep) is that it allows merging
-test cases together into logical groups. For example, because the test above
-is checking for the "sub1:" and "inc4:" labels, it will not match unless there
-is a "subl" in between those labels. If it existed somewhere else in the file,
-that would not count: "grep subl" matches if subl exists anywhere in the
-file.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
- <a name="FileCheck-check-prefix">The FileCheck -check-prefix option</a>
-</h4>
-
-<div>
-
-<p>The FileCheck -check-prefix option allows multiple test configurations to be
-driven from one .ll file. This is useful in many circumstances, for example,
-testing different architectural variants with llc. Here's a simple example:</p>
-
-<div class="doc_code">
-<pre>
-; RUN: llvm-as &lt; %s | llc -mtriple=i686-apple-darwin9 -mattr=sse41 \
-; RUN: | <b>FileCheck %s -check-prefix=X32</b>
-; RUN: llvm-as &lt; %s | llc -mtriple=x86_64-apple-darwin9 -mattr=sse41 \
-; RUN: | <b>FileCheck %s -check-prefix=X64</b>
-
-define &lt;4 x i32&gt; @pinsrd_1(i32 %s, &lt;4 x i32&gt; %tmp) nounwind {
- %tmp1 = insertelement &lt;4 x i32&gt; %tmp, i32 %s, i32 1
- ret &lt;4 x i32&gt; %tmp1
-; <b>X32:</b> pinsrd_1:
-; <b>X32:</b> pinsrd $1, 4(%esp), %xmm0
-
-; <b>X64:</b> pinsrd_1:
-; <b>X64:</b> pinsrd $1, %edi, %xmm0
-}
-</pre>
-</div>
-
-<p>In this case, we're testing that we get the expected code generation with
-both 32-bit and 64-bit code generation.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
- <a name="FileCheck-CHECK-NEXT">The "CHECK-NEXT:" directive</a>
-</h4>
-
-<div>
-
-<p>Sometimes you want to match lines and would like to verify that matches
-happen on exactly consecutive lines with no other lines in between them. In
-this case, you can use CHECK: and CHECK-NEXT: directives to specify this. If
-you specified a custom check prefix, just use "&lt;PREFIX&gt;-NEXT:". For
-example, something like this works as you'd expect:</p>
-
-<div class="doc_code">
-<pre>
-define void @t2(&lt;2 x double&gt;* %r, &lt;2 x double&gt;* %A, double %B) {
- %tmp3 = load &lt;2 x double&gt;* %A, align 16
- %tmp7 = insertelement &lt;2 x double&gt; undef, double %B, i32 0
- %tmp9 = shufflevector &lt;2 x double&gt; %tmp3,
- &lt;2 x double&gt; %tmp7,
- &lt;2 x i32&gt; &lt; i32 0, i32 2 &gt;
- store &lt;2 x double&gt; %tmp9, &lt;2 x double&gt;* %r, align 16
- ret void
-
-; <b>CHECK:</b> t2:
-; <b>CHECK:</b> movl 8(%esp), %eax
-; <b>CHECK-NEXT:</b> movapd (%eax), %xmm0
-; <b>CHECK-NEXT:</b> movhpd 12(%esp), %xmm0
-; <b>CHECK-NEXT:</b> movl 4(%esp), %eax
-; <b>CHECK-NEXT:</b> movapd %xmm0, (%eax)
-; <b>CHECK-NEXT:</b> ret
-}
-</pre>
-</div>
-
-<p>CHECK-NEXT: directives reject the input unless there is exactly one newline
-between it an the previous directive. A CHECK-NEXT cannot be the first
-directive in a file.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
- <a name="FileCheck-CHECK-NOT">The "CHECK-NOT:" directive</a>
-</h4>
-
-<div>
-
-<p>The CHECK-NOT: directive is used to verify that a string doesn't occur
-between two matches (or the first match and the beginning of the file). For
-example, to verify that a load is removed by a transformation, a test like this
-can be used:</p>
-
-<div class="doc_code">
-<pre>
-define i8 @coerce_offset0(i32 %V, i32* %P) {
- store i32 %V, i32* %P
-
- %P2 = bitcast i32* %P to i8*
- %P3 = getelementptr i8* %P2, i32 2
-
- %A = load i8* %P3
- ret i8 %A
-; <b>CHECK:</b> @coerce_offset0
-; <b>CHECK-NOT:</b> load
-; <b>CHECK:</b> ret i8
-}
-</pre>
-</div>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
- <a name="FileCheck-Matching">FileCheck Pattern Matching Syntax</a>
-</h4>
-
-<div>
-
-<!-- {% raw %} -->
-
-<p>The CHECK: and CHECK-NOT: directives both take a pattern to match. For most
-uses of FileCheck, fixed string matching is perfectly sufficient. For some
-things, a more flexible form of matching is desired. To support this, FileCheck
-allows you to specify regular expressions in matching strings, surrounded by
-double braces: <b>{{yourregex}}</b>. Because we want to use fixed string
-matching for a majority of what we do, FileCheck has been designed to support
-mixing and matching fixed string matching with regular expressions. This allows
-you to write things like this:</p>
-
-<div class="doc_code">
-<pre>
-; CHECK: movhpd <b>{{[0-9]+}}</b>(%esp), <b>{{%xmm[0-7]}}</b>
-</pre>
-</div>
-
-<p>In this case, any offset from the ESP register will be allowed, and any xmm
-register will be allowed.</p>
-
-<p>Because regular expressions are enclosed with double braces, they are
-visually distinct, and you don't need to use escape characters within the double
-braces like you would in C. In the rare case that you want to match double
-braces explicitly from the input, you can use something ugly like
-<b>{{[{][{]}}</b> as your pattern.</p>
-
-<!-- {% endraw %} -->
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
- <a name="FileCheck-Variables">FileCheck Variables</a>
-</h4>
-
-<div>
-
-
-<!-- {% raw %} -->
-
-<p>It is often useful to match a pattern and then verify that it occurs again
-later in the file. For codegen tests, this can be useful to allow any register,
-but verify that that register is used consistently later. To do this, FileCheck
-allows named variables to be defined and substituted into patterns. Here is a
-simple example:</p>
-
-<div class="doc_code">
-<pre>
-; CHECK: test5:
-; CHECK: notw <b>[[REGISTER:%[a-z]+]]</b>
-; CHECK: andw {{.*}}<b>[[REGISTER]]</b>
-</pre>
-</div>
-
-<p>The first check line matches a regex (<tt>%[a-z]+</tt>) and captures it into
-the variables "REGISTER". The second line verifies that whatever is in REGISTER
-occurs later in the file after an "andw". FileCheck variable references are
-always contained in <tt>[[ ]]</tt> pairs, are named, and their names can be
-formed with the regex "<tt>[a-zA-Z][a-zA-Z0-9]*</tt>". If a colon follows the
-name, then it is a definition of the variable, if not, it is a use.</p>
-
-<p>FileCheck variables can be defined multiple times, and uses always get the
-latest value. Note that variables are all read at the start of a "CHECK" line
-and are all defined at the end. This means that if you have something like
-"<tt>CHECK: [[XYZ:.*]]x[[XYZ]]</tt>" that the check line will read the previous
-value of the XYZ variable and define a new one after the match is performed. If
-you need to do something like this you can probably take advantage of the fact
-that FileCheck is not actually line-oriented when it matches, this allows you to
-define two separate CHECK lines that match on the same line.
-</p>
-
-<!-- {% endraw %} -->
-
-</div>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3><a name="rtvars">Variables and substitutions</a></h3>
-<!-- _______________________________________________________________________ -->
-<div>
- <p>With a RUN line there are a number of substitutions that are permitted. In
- general, any Tcl variable that is available in the <tt>substitute</tt>
- function (in <tt>test/lib/llvm.exp</tt>) can be substituted into a RUN line.
- To make a substitution just write the variable's name preceded by a $.
- Additionally, for compatibility reasons with previous versions of the test
- library, certain names can be accessed with an alternate syntax: a % prefix.
- These alternates are deprecated and may go away in a future version.
- </p>
- <p>Here are the available variable names. The alternate syntax is listed in
- parentheses.</p>
-
- <dl style="margin-left: 25px">
- <dt><b>$test</b> (%s)</dt>
- <dd>The full path to the test case's source. This is suitable for passing
- on the command line as the input to an llvm tool.</dd>
-
- <dt><b>$srcdir</b></dt>
- <dd>The source directory from where the "<tt>make check</tt>" was run.</dd>
-
- <dt><b>objdir</b></dt>
- <dd>The object directory that corresponds to the <tt>$srcdir</tt>.</dd>
-
- <dt><b>subdir</b></dt>
- <dd>A partial path from the <tt>test</tt> directory that contains the
- sub-directory that contains the test source being executed.</dd>
-
- <dt><b>srcroot</b></dt>
- <dd>The root directory of the LLVM src tree.</dd>
-
- <dt><b>objroot</b></dt>
- <dd>The root directory of the LLVM object tree. This could be the same
- as the srcroot.</dd>
-
- <dt><b>path</b><dt>
- <dd>The path to the directory that contains the test case source. This is
- for locating any supporting files that are not generated by the test, but
- used by the test.</dd>
-
- <dt><b>tmp</b></dt>
- <dd>The path to a temporary file name that could be used for this test case.
- The file name won't conflict with other test cases. You can append to it if
- you need multiple temporaries. This is useful as the destination of some
- redirected output.</dd>
-
- <dt><b>target_triplet</b> (%target_triplet)</dt>
- <dd>The target triplet that corresponds to the current host machine (the one
- running the test cases). This should probably be called "host".<dd>
-
- <dt><b>link</b> (%link)</dt>
- <dd>This full link command used to link LLVM executables. This has all the
- configured -I, -L and -l options.</dd>
-
- <dt><b>shlibext</b> (%shlibext)</dt>
- <dd>The suffix for the host platforms share library (dll) files. This
- includes the period as the first character.</dd>
- </dl>
- <p>To add more variables, two things need to be changed. First, add a line in
- the <tt>test/Makefile</tt> that creates the <tt>site.exp</tt> file. This will
- "set" the variable as a global in the site.exp file. Second, in the
- <tt>test/lib/llvm.exp</tt> file, in the substitute proc, add the variable name
- to the list of "global" declarations at the beginning of the proc. That's it,
- the variable can then be used in test scripts.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3><a name="rtfeatures">Other Features</a></h3>
-<!-- _______________________________________________________________________ -->
-<div>
- <p>To make RUN line writing easier, there are several shell scripts located
- in the <tt>llvm/test/Scripts</tt> directory. This directory is in the PATH
- when running tests, so you can just call these scripts using their name. For
- example:</p>
- <dl>
- <dt><b>ignore</b></dt>
- <dd>This script runs its arguments and then always returns 0. This is useful
- in cases where the test needs to cause a tool to generate an error (e.g. to
- check the error output). However, any program in a pipeline that returns a
- non-zero result will cause the test to fail. This script overcomes that
- issue and nicely documents that the test case is purposefully ignoring the
- result code of the tool</dd>
-
- <dt><b>not</b></dt>
- <dd>This script runs its arguments and then inverts the result code from
- it. Zero result codes become 1. Non-zero result codes become 0. This is
- useful to invert the result of a grep. For example "not grep X" means
- succeed only if you don't find X in the input.</dd>
- </dl>
-
- <p>Sometimes it is necessary to mark a test case as "expected fail" or XFAIL.
- You can easily mark a test as XFAIL just by including <tt>XFAIL: </tt> on a
- line near the top of the file. This signals that the test case should succeed
- if the test fails. Such test cases are counted separately by the testing
- tool. To specify an expected fail, use the XFAIL keyword in the comments of
- the test program followed by a colon and one or more failure patterns. Each
- failure pattern can be either '*' (to specify fail everywhere), or a part of a
- target triple (indicating the test should fail on that platform), or the name
- of a configurable feature (for example, "loadable_module"). If there is a
- match, the test is expected to fail. If not, the test is expected to
- succeed. To XFAIL everywhere just specify <tt>XFAIL: *</tt>. Here is an
- example of an <tt>XFAIL</tt> line:</p>
-
-<div class="doc_code">
-<pre>
-; XFAIL: darwin,sun
-</pre>
-</div>
-
- <p>To make the output more useful, the <tt>llvm_runtest</tt> function wil
- scan the lines of the test case for ones that contain a pattern that matches
- PR[0-9]+. This is the syntax for specifying a PR (Problem Report) number that
- is related to the test case. The number after "PR" specifies the LLVM bugzilla
- number. When a PR number is specified, it will be used in the pass/fail
- reporting. This is useful to quickly get some context when a test fails.</p>
-
- <p>Finally, any line that contains "END." will cause the special
- interpretation of lines to terminate. This is generally done right after the
- last RUN: line. This has two side effects: (a) it prevents special
- interpretation of lines that are part of the test program, not the
- instructions to the test case, and (b) it speeds things up for really big test
- cases by avoiding interpretation of the remainder of the file.</p>
-
-</div>
-
-</div>
-
-<!--=========================================================================-->
-<h2><a name="testsuiteoverview"><tt>test-suite</tt> Overview</a></h2>
-<!--=========================================================================-->
-
-<div>
-
-<p>The <tt>test-suite</tt> module contains a number of programs that can be
-compiled and executed. The <tt>test-suite</tt> includes reference outputs for
-all of the programs, so that the output of the executed program can be checked
-for correctness.</p>
-
-<p><tt>test-suite</tt> tests are divided into three types of tests: MultiSource,
-SingleSource, and External.</p>
-
-<ul>
-<li><tt>test-suite/SingleSource</tt>
-<p>The SingleSource directory contains test programs that are only a single
-source file in size. These are usually small benchmark programs or small
-programs that calculate a particular value. Several such programs are grouped
-together in each directory.</p></li>
-
-<li><tt>test-suite/MultiSource</tt>
-<p>The MultiSource directory contains subdirectories which contain entire
-programs with multiple source files. Large benchmarks and whole applications
-go here.</p></li>
-
-<li><tt>test-suite/External</tt>
-<p>The External directory contains Makefiles for building code that is external
-to (i.e., not distributed with) LLVM. The most prominent members of this
-directory are the SPEC 95 and SPEC 2000 benchmark suites. The <tt>External</tt>
-directory does not contain these actual tests, but only the Makefiles that know
-how to properly compile these programs from somewhere else. When
-using <tt>LNT</tt>, use the <tt>--test-externals</tt> option to include these
-tests in the results.</p></li>
-</ul>
-</div>
-
-<!--=========================================================================-->
-<h2><a name="testsuitequickstart"><tt>test-suite</tt> Quickstart</a></h2>
-<!--=========================================================================-->
-
-<div>
-<p>The modern way of running the <tt>test-suite</tt> is focused on testing and
-benchmarking complete compilers using
-the <a href="http://llvm.org/docs/lnt">LNT</a> testing infrastructure.</p>
-
-<p>For more information on using LNT to execute the <tt>test-suite</tt>, please
-see the <a href="http://llvm.org/docs/lnt/quickstart.html">LNT Quickstart</a>
-documentation.</p>
-</div>
-
-<!--=========================================================================-->
-<h2><a name="testsuitemakefiles"><tt>test-suite</tt> Makefiles</a></h2>
-<!--=========================================================================-->
-
-<div>
-<p>Historically, the <tt>test-suite</tt> was executed using a complicated setup
-of Makefiles. The LNT based approach above is recommended for most users, but
-there are some testing scenarios which are not supported by the LNT approach. In
-addition, LNT currently uses the Makefile setup under the covers and so
-developers who are interested in how LNT works under the hood may want to
-understand the Makefile based setup.</p>
-
-<p>For more information on the <tt>test-suite</tt> Makefile setup, please see
-the <a href="TestSuiteMakefileGuide.html">Test Suite Makefile Guide.</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-
-<hr>
-<address>
- <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
- src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
- <a href="http://validator.w3.org/check/referer"><img
- src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
- John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner<br>
- <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
- Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/TestingGuide.rst b/docs/TestingGuide.rst
new file mode 100644
index 0000000000..329003f089
--- /dev/null
+++ b/docs/TestingGuide.rst
@@ -0,0 +1,529 @@
+=================================
+LLVM Testing Infrastructure Guide
+=================================
+
+Written by John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya
+Lattner
+
+.. contents::
+ :local:
+
+.. toctree::
+ :hidden:
+
+ TestSuiteMakefileGuide
+
+Overview
+========
+
+This document is the reference manual for the LLVM testing
+infrastructure. It documents the structure of the LLVM testing
+infrastructure, the tools needed to use it, and how to add and run
+tests.
+
+Requirements
+============
+
+In order to use the LLVM testing infrastructure, you will need all of
+the software required to build LLVM, as well as
+`Python <http://python.org>`_ 2.4 or later.
+
+LLVM testing infrastructure organization
+========================================
+
+The LLVM testing infrastructure contains two major categories of tests:
+regression tests and whole programs. The regression tests are contained
+inside the LLVM repository itself under ``llvm/test`` and are expected
+to always pass -- they should be run before every commit.
+
+The whole programs tests are referred to as the "LLVM test suite" (or
+"test-suite") and are in the ``test-suite`` module in subversion. For
+historical reasons, these tests are also referred to as the "nightly
+tests" in places, which is less ambiguous than "test-suite" and remains
+in use although we run them much more often than nightly.
+
+Regression tests
+----------------
+
+The regression tests are small pieces of code that test a specific
+feature of LLVM or trigger a specific bug in LLVM. They are usually
+written in LLVM assembly language, but can be written in other languages
+if the test targets a particular language front end (and the appropriate
+``--with-llvmgcc`` options were used at ``configure`` time of the
+``llvm`` module). These tests are driven by the 'lit' testing tool,
+which is part of LLVM.
+
+These code fragments are not complete programs. The code generated from
+them is never executed to determine correct behavior.
+
+These code fragment tests are located in the ``llvm/test`` directory.
+
+Typically when a bug is found in LLVM, a regression test containing just
+enough code to reproduce the problem should be written and placed
+somewhere underneath this directory. In most cases, this will be a small
+piece of LLVM assembly language code, often distilled from an actual
+application or benchmark.
+
+``test-suite``
+--------------
+
+The test suite contains whole programs, which are pieces of code which
+can be compiled and linked into a stand-alone program that can be
+executed. These programs are generally written in high level languages
+such as C or C++.
+
+These programs are compiled using a user specified compiler and set of
+flags, and then executed to capture the program output and timing
+information. The output of these programs is compared to a reference
+output to ensure that the program is being compiled correctly.
+
+In addition to compiling and executing programs, whole program tests
+serve as a way of benchmarking LLVM performance, both in terms of the
+efficiency of the programs generated as well as the speed with which
+LLVM compiles, optimizes, and generates code.
+
+The test-suite is located in the ``test-suite`` Subversion module.
+
+Debugging Information tests
+---------------------------
+
+The test suite contains tests to check quality of debugging information.
+The test are written in C based languages or in LLVM assembly language.
+
+These tests are compiled and run under a debugger. The debugger output
+is checked to validate of debugging information. See README.txt in the
+test suite for more information . This test suite is located in the
+``debuginfo-tests`` Subversion module.
+
+Quick start
+===========
+
+The tests are located in two separate Subversion modules. The
+regressions tests are in the main "llvm" module under the directory
+``llvm/test`` (so you get these tests for free with the main llvm tree).
+Use "make check-all" to run the regression tests after building LLVM.
+
+The more comprehensive test suite that includes whole programs in C and C++
+is in the ``test-suite`` module. See :ref:`test-suite Quickstart
+<test-suite-quickstart>` for more information on running these tests.
+
+Regression tests
+----------------
+
+To run all of the LLVM regression tests, use master Makefile in the
+``llvm/test`` directory:
+
+.. code-block:: bash
+
+ % gmake -C llvm/test
+
+or
+
+.. code-block:: bash
+
+ % gmake check
+
+If you have `Clang <http://clang.llvm.org/>`_ checked out and built, you
+can run the LLVM and Clang tests simultaneously using:
+
+or
+
+.. code-block:: bash
+
+ % gmake check-all
+
+To run the tests with Valgrind (Memcheck by default), just append
+``VG=1`` to the commands above, e.g.:
+
+.. code-block:: bash
+
+ % gmake check VG=1
+
+To run individual tests or subsets of tests, you can use the 'llvm-lit'
+script which is built as part of LLVM. For example, to run the
+'Integer/BitPacked.ll' test by itself you can run:
+
+.. code-block:: bash
+
+ % llvm-lit ~/llvm/test/Integer/BitPacked.ll
+
+or to run all of the ARM CodeGen tests:
+
+.. code-block:: bash
+
+ % llvm-lit ~/llvm/test/CodeGen/ARM
+
+For more information on using the 'lit' tool, see 'llvm-lit --help' or
+the 'lit' man page.
+
+Debugging Information tests
+---------------------------
+
+To run debugging information tests simply checkout the tests inside
+clang/test directory.
+
+.. code-block:: bash
+
+ % cd clang/test
+ % svn co http://llvm.org/svn/llvm-project/debuginfo-tests/trunk debuginfo-tests
+
+These tests are already set up to run as part of clang regression tests.
+
+Regression test structure
+=========================
+
+The LLVM regression tests are driven by 'lit' and are located in the
+``llvm/test`` directory.
+
+This directory contains a large array of small tests that exercise
+various features of LLVM and to ensure that regressions do not occur.
+The directory is broken into several sub-directories, each focused on a
+particular area of LLVM. A few of the important ones are:
+
+- ``Analysis``: checks Analysis passes.
+- ``Archive``: checks the Archive library.
+- ``Assembler``: checks Assembly reader/writer functionality.
+- ``Bitcode``: checks Bitcode reader/writer functionality.
+- ``CodeGen``: checks code generation and each target.
+- ``Features``: checks various features of the LLVM language.
+- ``Linker``: tests bitcode linking.
+- ``Transforms``: tests each of the scalar, IPO, and utility transforms
+ to ensure they make the right transformations.
+- ``Verifier``: tests the IR verifier.
+
+Writing new regression tests
+----------------------------
+
+The regression test structure is very simple, but does require some
+information to be set. This information is gathered via ``configure``
+and is written to a file, ``lit.site.cfg`` in ``llvm/test``. The
+``llvm/test`` Makefile does this work for you.
+
+In order for the regression tests to work, each directory of tests must
+have a ``lit.local.cfg`` file. Lit looks for this file to determine how
+to run the tests. This file is just Python code and thus is very
+flexible, but we've standardized it for the LLVM regression tests. If
+you're adding a directory of tests, just copy ``lit.local.cfg`` from
+another directory to get running. The standard ``lit.local.cfg`` simply
+specifies which files to look in for tests. Any directory that contains
+only directories does not need the ``lit.local.cfg`` file. Read the :doc:`Lit
+documentation <CommandGuide/lit>` for more information.
+
+The ``llvm-runtests`` function looks at each file that is passed to it
+and gathers any lines together that match "RUN:". These are the "RUN"
+lines that specify how the test is to be run. So, each test script must
+contain RUN lines if it is to do anything. If there are no RUN lines,
+the ``llvm-runtests`` function will issue an error and the test will
+fail.
+
+RUN lines are specified in the comments of the test program using the
+keyword ``RUN`` followed by a colon, and lastly the command (pipeline)
+to execute. Together, these lines form the "script" that
+``llvm-runtests`` executes to run the test case. The syntax of the RUN
+lines is similar to a shell's syntax for pipelines including I/O
+redirection and variable substitution. However, even though these lines
+may *look* like a shell script, they are not. RUN lines are interpreted
+directly by the Tcl ``exec`` command. They are never executed by a
+shell. Consequently the syntax differs from normal shell script syntax
+in a few ways. You can specify as many RUN lines as needed.
+
+lit performs substitution on each RUN line to replace LLVM tool names
+with the full paths to the executable built for each tool (in
+$(LLVM\_OBJ\_ROOT)/$(BuildMode)/bin). This ensures that lit does not
+invoke any stray LLVM tools in the user's path during testing.
+
+Each RUN line is executed on its own, distinct from other lines unless
+its last character is ``\``. This continuation character causes the RUN
+line to be concatenated with the next one. In this way you can build up
+long pipelines of commands without making huge line lengths. The lines
+ending in ``\`` are concatenated until a RUN line that doesn't end in
+``\`` is found. This concatenated set of RUN lines then constitutes one
+execution. Tcl will substitute variables and arrange for the pipeline to
+be executed. If any process in the pipeline fails, the entire line (and
+test case) fails too.
+
+Below is an example of legal RUN lines in a ``.ll`` file:
+
+.. code-block:: llvm
+
+ ; RUN: llvm-as < %s | llvm-dis > %t1
+ ; RUN: llvm-dis < %s.bc-13 > %t2
+ ; RUN: diff %t1 %t2
+
+As with a Unix shell, the RUN: lines permit pipelines and I/O
+redirection to be used. However, the usage is slightly different than
+for Bash. To check what's legal, see the documentation for the `Tcl
+exec <http://www.tcl.tk/man/tcl8.5/TclCmd/exec.htm#M2>`_ command and the
+`tutorial <http://www.tcl.tk/man/tcl8.5/tutorial/Tcl26.html>`_. The
+major differences are:
+
+- You can't do ``2>&1``. That will cause Tcl to write to a file named
+ ``&1``. Usually this is done to get stderr to go through a pipe. You
+ can do that in tcl with ``|&`` so replace this idiom:
+ ``... 2>&1 | grep`` with ``... |& grep``
+- You can only redirect to a file, not to another descriptor and not
+ from a here document.
+- tcl supports redirecting to open files with the @ syntax but you
+ shouldn't use that here.
+
+There are some quoting rules that you must pay attention to when writing
+your RUN lines. In general nothing needs to be quoted. Tcl won't strip
+off any quote characters so they will get passed to the invoked program.
+For example:
+
+.. code-block:: bash
+
+ ... | grep 'find this string'
+
+This will fail because the ' characters are passed to grep. This would
+instruction grep to look for ``'find`` in the files ``this`` and
+``string'``. To avoid this use curly braces to tell Tcl that it should
+treat everything enclosed as one value. So our example would become:
+
+.. code-block:: bash
+
+ ... | grep {find this string}
+
+Additionally, the characters ``[`` and ``]`` are treated specially by
+Tcl. They tell Tcl to interpret the content as a command to execute.
+Since these characters are often used in regular expressions this can
+have disastrous results and cause the entire test run in a directory to
+fail. For example, a common idiom is to look for some basicblock number:
+
+.. code-block:: bash
+
+ ... | grep bb[2-8]
+
+This, however, will cause Tcl to fail because its going to try to
+execute a program named "2-8". Instead, what you want is this:
+
+.. code-block:: bash
+
+ ... | grep {bb\[2-8\]}
+
+Finally, if you need to pass the ``\`` character down to a program, then
+it must be doubled. This is another Tcl special character. So, suppose
+you had:
+
+.. code-block:: bash
+
+ ... | grep 'i32\*'
+
+This will fail to match what you want (a pointer to i32). First, the
+``'`` do not get stripped off. Second, the ``\`` gets stripped off by
+Tcl so what grep sees is: ``'i32*'``. That's not likely to match
+anything. To resolve this you must use ``\\`` and the ``{}``, like this:
+
+.. code-block:: bash
+
+ ... | grep {i32\\*}
+
+If your system includes GNU ``grep``, make sure that ``GREP_OPTIONS`` is
+not set in your environment. Otherwise, you may get invalid results
+(both false positives and false negatives).
+
+The FileCheck utility
+---------------------
+
+A powerful feature of the RUN: lines is that it allows any arbitrary
+commands to be executed as part of the test harness. While standard
+(portable) unix tools like 'grep' work fine on run lines, as you see
+above, there are a lot of caveats due to interaction with Tcl syntax,
+and we want to make sure the run lines are portable to a wide range of
+systems. Another major problem is that grep is not very good at checking
+to verify that the output of a tools contains a series of different
+output in a specific order. The FileCheck tool was designed to help with
+these problems.
+
+FileCheck is designed to read a file to check from standard input, and the set
+of things to verify from a file specified as a command line argument.
+FileCheck is described in :doc:`the FileCheck man page
+<CommandGuide/FileCheck>`.
+
+Variables and substitutions
+---------------------------
+
+With a RUN line there are a number of substitutions that are permitted.
+In general, any Tcl variable that is available in the ``substitute``
+function (in ``test/lib/llvm.exp``) can be substituted into a RUN line.
+To make a substitution just write the variable's name preceded by a $.
+Additionally, for compatibility reasons with previous versions of the
+test library, certain names can be accessed with an alternate syntax: a
+% prefix. These alternates are deprecated and may go away in a future
+version.
+
+Here are the available variable names. The alternate syntax is listed in
+parentheses.
+
+``$test`` (``%s``)
+ The full path to the test case's source. This is suitable for passing on
+ the command line as the input to an llvm tool.
+
+``%(line)``, ``%(line+<number>)``, ``%(line-<number>)``
+ The number of the line where this variable is used, with an optional
+ integer offset. This can be used in tests with multiple RUN: lines,
+ which reference test file's line numbers.
+
+``$srcdir``
+ The source directory from where the "``make check``" was run.
+
+``objdir``
+ The object directory that corresponds to the ``$srcdir``.
+
+``subdir``
+ A partial path from the ``test`` directory that contains the
+ sub-directory that contains the test source being executed.
+
+``srcroot``
+ The root directory of the LLVM src tree.
+
+``objroot``
+ The root directory of the LLVM object tree. This could be the same as
+ the srcroot.
+
+``path``
+ The path to the directory that contains the test case source. This is
+ for locating any supporting files that are not generated by the test,
+ but used by the test.
+
+``tmp``
+ The path to a temporary file name that could be used for this test case.
+ The file name won't conflict with other test cases. You can append to it
+ if you need multiple temporaries. This is useful as the destination of
+ some redirected output.
+
+``target_triplet`` (``%target_triplet``)
+ The target triplet that corresponds to the current host machine (the one
+ running the test cases). This should probably be called "host".
+
+``link`` (``%link``)
+ This full link command used to link LLVM executables. This has all the
+ configured -I, -L and -l options.
+
+``shlibext`` (``%shlibext``)
+ The suffix for the host platforms share library (dll) files. This
+ includes the period as the first character.
+
+To add more variables, two things need to be changed. First, add a line
+in the ``test/Makefile`` that creates the ``site.exp`` file. This will
+"set" the variable as a global in the site.exp file. Second, in the
+``test/lib/llvm.exp`` file, in the substitute proc, add the variable
+name to the list of "global" declarations at the beginning of the proc.
+That's it, the variable can then be used in test scripts.
+
+Other Features
+--------------
+
+To make RUN line writing easier, there are several shell scripts located
+in the ``llvm/test/Scripts`` directory. This directory is in the PATH
+when running tests, so you can just call these scripts using their name.
+For example:
+
+``ignore``
+ This script runs its arguments and then always returns 0. This is useful
+ in cases where the test needs to cause a tool to generate an error (e.g.
+ to check the error output). However, any program in a pipeline that
+ returns a non-zero result will cause the test to fail. This script
+ overcomes that issue and nicely documents that the test case is
+ purposefully ignoring the result code of the tool
+``not``
+ This script runs its arguments and then inverts the result code from it.
+ Zero result codes become 1. Non-zero result codes become 0. This is
+ useful to invert the result of a grep. For example "not grep X" means
+ succeed only if you don't find X in the input.
+
+Sometimes it is necessary to mark a test case as "expected fail" or
+XFAIL. You can easily mark a test as XFAIL just by including ``XFAIL:``
+on a line near the top of the file. This signals that the test case
+should succeed if the test fails. Such test cases are counted separately
+by the testing tool. To specify an expected fail, use the XFAIL keyword
+in the comments of the test program followed by a colon and one or more
+failure patterns. Each failure pattern can be either ``*`` (to specify
+fail everywhere), or a part of a target triple (indicating the test
+should fail on that platform), or the name of a configurable feature
+(for example, ``loadable_module``). If there is a match, the test is
+expected to fail. If not, the test is expected to succeed. To XFAIL
+everywhere just specify ``XFAIL: *``. Here is an example of an ``XFAIL``
+line:
+
+.. code-block:: llvm
+
+ ; XFAIL: darwin,sun
+
+To make the output more useful, the ``llvm_runtest`` function wil scan
+the lines of the test case for ones that contain a pattern that matches
+``PR[0-9]+``. This is the syntax for specifying a PR (Problem Report) number
+that is related to the test case. The number after "PR" specifies the
+LLVM bugzilla number. When a PR number is specified, it will be used in
+the pass/fail reporting. This is useful to quickly get some context when
+a test fails.
+
+Finally, any line that contains "END." will cause the special
+interpretation of lines to terminate. This is generally done right after
+the last RUN: line. This has two side effects:
+
+(a) it prevents special interpretation of lines that are part of the test
+ program, not the instructions to the test case, and
+
+(b) it speeds things up for really big test cases by avoiding
+ interpretation of the remainder of the file.
+
+``test-suite`` Overview
+=======================
+
+The ``test-suite`` module contains a number of programs that can be
+compiled and executed. The ``test-suite`` includes reference outputs for
+all of the programs, so that the output of the executed program can be
+checked for correctness.
+
+``test-suite`` tests are divided into three types of tests: MultiSource,
+SingleSource, and External.
+
+- ``test-suite/SingleSource``
+
+ The SingleSource directory contains test programs that are only a
+ single source file in size. These are usually small benchmark
+ programs or small programs that calculate a particular value. Several
+ such programs are grouped together in each directory.
+
+- ``test-suite/MultiSource``
+
+ The MultiSource directory contains subdirectories which contain
+ entire programs with multiple source files. Large benchmarks and
+ whole applications go here.
+
+- ``test-suite/External``
+
+ The External directory contains Makefiles for building code that is
+ external to (i.e., not distributed with) LLVM. The most prominent
+ members of this directory are the SPEC 95 and SPEC 2000 benchmark
+ suites. The ``External`` directory does not contain these actual
+ tests, but only the Makefiles that know how to properly compile these
+ programs from somewhere else. When using ``LNT``, use the
+ ``--test-externals`` option to include these tests in the results.
+
+.. _test-suite-quickstart:
+
+``test-suite`` Quickstart
+-------------------------
+
+The modern way of running the ``test-suite`` is focused on testing and
+benchmarking complete compilers using the
+`LNT <http://llvm.org/docs/lnt>`_ testing infrastructure.
+
+For more information on using LNT to execute the ``test-suite``, please
+see the `LNT Quickstart <http://llvm.org/docs/lnt/quickstart.html>`_
+documentation.
+
+``test-suite`` Makefiles
+------------------------
+
+Historically, the ``test-suite`` was executed using a complicated setup
+of Makefiles. The LNT based approach above is recommended for most
+users, but there are some testing scenarios which are not supported by
+the LNT approach. In addition, LNT currently uses the Makefile setup
+under the covers and so developers who are interested in how LNT works
+under the hood may want to understand the Makefile based setup.
+
+For more information on the ``test-suite`` Makefile setup, please see
+the :doc:`Test Suite Makefile Guide <TestSuiteMakefileGuide>`.
diff --git a/docs/conf.py b/docs/conf.py
index a1e9b5f6e2..919bb3bc9d 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -47,9 +47,9 @@ copyright = u'2012, LLVM Project'
# built documents.
#
# The short X.Y version.
-version = '3.2'
+version = '3.3'
# The full version, including alpha/beta/rc tags.
-release = '3.2'
+release = '3.3'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
diff --git a/docs/development_process.rst b/docs/development_process.rst
index 4fc20b3412..74324b98a6 100644
--- a/docs/development_process.rst
+++ b/docs/development_process.rst
@@ -8,6 +8,7 @@ Development Process Documentation
MakefileGuide
Projects
+ LLVMBuild
* :ref:`projects`
@@ -16,7 +17,7 @@ Development Process Documentation
tree) allow the project code to be located outside (or inside) the ``llvm/``
tree, while using LLVM header files and libraries.
-* `LLVMBuild Documentation <LLVMBuild.html>`_
+* :doc:`LLVMBuild`
Describes the LLVMBuild organization and files used by LLVM to specify
component descriptions.
diff --git a/docs/subsystems.rst b/docs/subsystems.rst
index 80d0eed663..35d7b8111d 100644
--- a/docs/subsystems.rst
+++ b/docs/subsystems.rst
@@ -18,6 +18,12 @@ Subsystem Documentation
DebuggingJITedCode
GoldPlugin
MarkedUpDisassembly
+ HowToUseInstrMappings
+ SystemLibrary
+ SourceLevelDebugging
+
+.. FIXME: once LangRef is Sphinxified, HowToUseInstrMappings should be put
+ under LangRef's toctree instead of this page's toctree.
* `Writing an LLVM Pass <WritingAnLLVMPass.html>`_
@@ -48,7 +54,7 @@ Subsystem Documentation
The interfaces source-language compilers should use for compiling GC'd
programs.
-* `Source Level Debugging with LLVM <SourceLevelDebugging.html>`_
+* :doc:`Source Level Debugging with LLVM <SourceLevelDebugging>`
This document describes the design and philosophy behind the LLVM
source-level debugger.
@@ -67,9 +73,9 @@ Subsystem Documentation
This describes the file format and encoding used for LLVM "bc" files.
-* `System Library <SystemLibrary.html>`_
+* :doc:`System Library <SystemLibrary>`
- This document describes the LLVM System Library (<tt>lib/System</tt>) and
+ This document describes the LLVM System Library (``lib/System``) and
how to keep LLVM source code portable
* :ref:`lto`
diff --git a/docs/userguides.rst b/docs/userguides.rst
index 8c1554dfce..8f184205c8 100644
--- a/docs/userguides.rst
+++ b/docs/userguides.rst
@@ -20,6 +20,7 @@ User Guides
HowToSubmitABug
SphinxQuickstartTemplate
Phabricator
+ TestingGuide
* :ref:`getting_started`
@@ -77,7 +78,7 @@ User Guides
A template + tutorial for writing new Sphinx documentation. It is meant
to be read in source form.
-* `LLVM Testing Infrastructure Guide <TestingGuide.html>`_
+* :doc:`LLVM Testing Infrastructure Guide <TestingGuide>`
A reference manual for using the LLVM testing infrastructure.
diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp
index 215cb4d371..2d35ca73e8 100644
--- a/examples/ExceptionDemo/ExceptionDemo.cpp
+++ b/examples/ExceptionDemo/ExceptionDemo.cpp
@@ -667,8 +667,6 @@ static _Unwind_Reason_Code handleLsda(int version,
const uint8_t *actionTableStart = callSiteTableEnd;
const uint8_t *callSitePtr = callSiteTableStart;
- bool foreignException = false;
-
while (callSitePtr < callSiteTableEnd) {
uintptr_t start = readEncodedPointer(&callSitePtr,
callSiteEncoding);
@@ -684,7 +682,6 @@ static _Unwind_Reason_Code handleLsda(int version,
// We have been notified of a foreign exception being thrown,
// and we therefore need to execute cleanup landing pads
actionEntry = 0;
- foreignException = true;
}
if (landingPad == 0) {
@@ -1687,7 +1684,6 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
std::vector<llvm::Constant*> structVals;
llvm::Constant *nextStruct;
- llvm::GlobalVariable *nextGlobal = NULL;
// Generate each type info
//
@@ -1702,7 +1698,6 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
typeInfoName = typeInfoNameBuilder.str();
// Note: Does not seem to work without allocation
- nextGlobal =
new llvm::GlobalVariable(module,
ourTypeInfoType,
true,
diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h
index 864870bfe7..dd2da66631 100644
--- a/include/llvm-c/lto.h
+++ b/include/llvm-c/lto.h
@@ -408,6 +408,13 @@ lto_codegen_compile_to_file(lto_code_gen_t cg, const char** name);
extern void
lto_codegen_debug_options(lto_code_gen_t cg, const char *);
+/**
+ * Initializes LLVM disassemblers.
+ * FIXME: This doesn't really belong here.
+ */
+extern void
+lto_initialize_disassembler(void);
+
#ifdef __cplusplus
}
#endif
diff --git a/include/llvm/ADT/MapVector.h b/include/llvm/ADT/MapVector.h
index 6aacca5a6f..42f8e553d4 100644
--- a/include/llvm/ADT/MapVector.h
+++ b/include/llvm/ADT/MapVector.h
@@ -83,6 +83,18 @@ public:
typename MapType::const_iterator Pos = Map.find(Key);
return Pos == Map.end()? 0 : 1;
}
+
+ iterator find(const KeyT &Key) {
+ typename MapType::const_iterator Pos = Map.find(Key);
+ return Pos == Map.end()? Vector.end() :
+ (Vector.begin() + Pos->second);
+ }
+
+ const_iterator find(const KeyT &Key) const {
+ typename MapType::const_iterator Pos = Map.find(Key);
+ return Pos == Map.end()? Vector.end() :
+ (Vector.begin() + Pos->second);
+ }
};
}
diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h
index aee500d4fb..dacda36521 100644
--- a/include/llvm/ADT/STLExtras.h
+++ b/include/llvm/ADT/STLExtras.h
@@ -246,10 +246,10 @@ inline int array_pod_sort_comparator(const void *P1, const void *P2) {
return 0;
}
-/// get_array_pad_sort_comparator - This is an internal helper function used to
+/// get_array_pod_sort_comparator - This is an internal helper function used to
/// get type deduction of T right.
template<typename T>
-inline int (*get_array_pad_sort_comparator(const T &))
+inline int (*get_array_pod_sort_comparator(const T &))
(const void*, const void*) {
return array_pod_sort_comparator<T>;
}
@@ -274,7 +274,7 @@ inline void array_pod_sort(IteratorTy Start, IteratorTy End) {
// Don't dereference start iterator of empty sequence.
if (Start == End) return;
qsort(&*Start, End-Start, sizeof(*Start),
- get_array_pad_sort_comparator(*Start));
+ get_array_pod_sort_comparator(*Start));
}
template<class IteratorTy>
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index 6e0fd94dfe..e508f9df90 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -365,7 +365,7 @@ template <typename T>
class SmallVectorImpl : public SmallVectorTemplateBase<T, isPodLike<T>::value> {
typedef SmallVectorTemplateBase<T, isPodLike<T>::value > SuperClass;
- SmallVectorImpl(const SmallVectorImpl&); // DISABLED.
+ SmallVectorImpl(const SmallVectorImpl&) LLVM_DELETED_FUNCTION;
public:
typedef typename SuperClass::iterator iterator;
typedef typename SuperClass::size_type size_type;
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index 2ace8294a8..a9e67cafba 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -44,7 +44,6 @@ public:
UnknownArch,
arm, // ARM; arm, armv.*, xscale
- cellspu, // CellSPU: spu, cellspu
hexagon, // Hexagon: hexagon
mips, // MIPS: mips, mipsallegrex
mipsel, // MIPSEL: mipsel, mipsallegrexel
@@ -66,7 +65,8 @@ public:
nvptx64, // NVPTX: 64-bit
le32, // le32: generic little-endian 32-bit CPU (PNaCl / Emscripten)
amdil, // amdil: amd IL
- spir // SPIR: standard portable IR for OpenCL
+ spir, // SPIR: standard portable IR for OpenCL 32-bit version
+ spir64 // SPIR: standard portable IR for OpenCL 64-bit version
};
enum VendorType {
UnknownVendor,
diff --git a/include/llvm/Analysis/DependenceAnalysis.h b/include/llvm/Analysis/DependenceAnalysis.h
index b4327eeb0b..1983c00c57 100644
--- a/include/llvm/Analysis/DependenceAnalysis.h
+++ b/include/llvm/Analysis/DependenceAnalysis.h
@@ -18,6 +18,16 @@
// of memory references in a function, returning either NULL, for no dependence,
// or a more-or-less detailed description of the dependence between them.
//
+// This pass exists to support the DependenceGraph pass. There are two separate
+// passes because there's a useful separation of concerns. A dependence exists
+// if two conditions are met:
+//
+// 1) Two instructions reference the same memory location, and
+// 2) There is a flow of control leading from one instruction to the other.
+//
+// DependenceAnalysis attacks the first condition; DependenceGraph will attack
+// the second (it's not yet ready).
+//
// Please note that this is work in progress and the interface is subject to
// change.
//
@@ -53,8 +63,8 @@ namespace llvm {
/// input dependences are unordered.
class Dependence {
public:
- Dependence(const Instruction *Source,
- const Instruction *Destination) :
+ Dependence(Instruction *Source,
+ Instruction *Destination) :
Src(Source), Dst(Destination) {}
virtual ~Dependence() {}
@@ -82,11 +92,11 @@ namespace llvm {
/// getSrc - Returns the source instruction for this dependence.
///
- const Instruction *getSrc() const { return Src; }
+ Instruction *getSrc() const { return Src; }
/// getDst - Returns the destination instruction for this dependence.
///
- const Instruction *getDst() const { return Dst; }
+ Instruction *getDst() const { return Dst; }
/// isInput - Returns true if this is an input dependence.
///
@@ -158,7 +168,7 @@ namespace llvm {
///
void dump(raw_ostream &OS) const;
private:
- const Instruction *Src, *Dst;
+ Instruction *Src, *Dst;
friend class DependenceAnalysis;
};
@@ -173,8 +183,8 @@ namespace llvm {
/// input dependences are unordered.
class FullDependence : public Dependence {
public:
- FullDependence(const Instruction *Src,
- const Instruction *Dst,
+ FullDependence(Instruction *Src,
+ Instruction *Dst,
bool LoopIndependent,
unsigned Levels);
~FullDependence() {
@@ -243,8 +253,8 @@ namespace llvm {
/// The flag PossiblyLoopIndependent should be set by the caller
/// if it appears that control flow can reach from Src to Dst
/// without traversing a loop back edge.
- Dependence *depends(const Instruction *Src,
- const Instruction *Dst,
+ Dependence *depends(Instruction *Src,
+ Instruction *Dst,
bool PossiblyLoopIndependent);
/// getSplitIteration - Give a dependence that's splitable at some
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index a075db3342..82a3a566c9 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -120,15 +120,18 @@ namespace llvm {
/// bound the computation necessary to determine whether the cost is
/// sufficiently low to warrant inlining.
InlineCost getInlineCost(CallSite CS, int Threshold);
- /// getCalledFunction - The heuristic used to determine if we should inline
- /// the function call or not. The callee is explicitly specified, to allow
- /// you to calculate the cost of inlining a function via a pointer. This
- /// behaves exactly as the version with no explicit callee parameter in all
- /// other respects.
+
+ /// \brief Get an InlineCost with the callee explicitly specified.
+ /// This allows you to calculate the cost of inlining a function via a
+ /// pointer. This behaves exactly as the version with no explicit callee
+ /// parameter in all other respects.
//
// Note: This is used by out-of-tree passes, please do not remove without
// adding a replacement API.
InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold);
+
+ /// \brief Minimal filter to detect invalid constructs for inlining.
+ bool isInlineViable(Function &Callee);
};
}
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
index e561e3742b..6db400c563 100644
--- a/include/llvm/Analysis/InstructionSimplify.h
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -57,7 +57,7 @@ namespace llvm {
/// SimplifyUDivInst - Given operands for a UDiv, see if we can
/// fold the result. If not, this returns null.
- Value *SimplifyUDivInst(Value *LHS, Value *RHS, const DataLayout *TD = 0,
+ Value *SimplifyUDivInst(Value *LHS, Value *RHS, const DataLayout *TD = 0,
const TargetLibraryInfo *TLI = 0,
const DominatorTree *DT = 0);
@@ -69,7 +69,7 @@ namespace llvm {
/// SimplifySRemInst - Given operands for an SRem, see if we can
/// fold the result. If not, this returns null.
- Value *SimplifySRemInst(Value *LHS, Value *RHS, const DataLayout *TD = 0,
+ Value *SimplifySRemInst(Value *LHS, Value *RHS, const DataLayout *TD = 0,
const TargetLibraryInfo *TLI = 0,
const DominatorTree *DT = 0);
@@ -88,7 +88,7 @@ namespace llvm {
/// SimplifyShlInst - Given operands for a Shl, see if we can
/// fold the result. If not, this returns null.
Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const DataLayout *TD = 0,
+ const DataLayout *TD = 0,
const TargetLibraryInfo *TLI = 0,
const DominatorTree *DT = 0);
@@ -127,14 +127,14 @@ namespace llvm {
/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
/// fold the result. If not, this returns null.
Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const DataLayout *TD = 0,
+ const DataLayout *TD = 0,
const TargetLibraryInfo *TLI = 0,
const DominatorTree *DT = 0);
/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
/// fold the result. If not, this returns null.
Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const DataLayout *TD = 0,
+ const DataLayout *TD = 0,
const TargetLibraryInfo *TLI = 0,
const DominatorTree *DT = 0);
@@ -178,7 +178,7 @@ namespace llvm {
/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
/// fold the result. If not, this returns null.
Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- const DataLayout *TD = 0,
+ const DataLayout *TD = 0,
const TargetLibraryInfo *TLI = 0,
const DominatorTree *DT = 0);
diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h
index 5ace200803..6c352de2a9 100644
--- a/include/llvm/Attributes.h
+++ b/include/llvm/Attributes.h
@@ -92,11 +92,6 @@ private:
Attributes(AttributesImpl *A) : Attrs(A) {}
public:
Attributes() : Attrs(0) {}
- Attributes(const Attributes &A) : Attrs(A.Attrs) {}
- Attributes &operator=(const Attributes &A) {
- Attrs = A.Attrs;
- return *this;
- }
/// get - Return a uniquified Attributes object. This takes the uniquified
/// value from the Builder and wraps it in the Attributes class.
@@ -199,7 +194,6 @@ public:
AttrBuilder() : Bits(0) {}
explicit AttrBuilder(uint64_t B) : Bits(B) {}
AttrBuilder(const Attributes &A) : Bits(A.Raw()) {}
- AttrBuilder(const AttrBuilder &B) : Bits(B.Bits) {}
void clear() { Bits = 0; }
@@ -318,21 +312,26 @@ public:
FunctionIndex = ~0U
};
private:
- /// AttrList - The attributes that we are managing. This can be null to
- /// represent the empty attributes list.
+ /// @brief The attributes that we are managing. This can be null to represent
+ /// the empty attributes list.
AttributeListImpl *AttrList;
+
+ /// @brief The attributes for the specified index are returned. Attributes
+ /// for the result are denoted with Idx = 0.
+ Attributes getAttributes(unsigned Idx) const;
+
+ explicit AttrListPtr(AttributeListImpl *LI) : AttrList(LI) {}
public:
AttrListPtr() : AttrList(0) {}
- AttrListPtr(const AttrListPtr &P);
+ AttrListPtr(const AttrListPtr &P) : AttrList(P.AttrList) {}
const AttrListPtr &operator=(const AttrListPtr &RHS);
- ~AttrListPtr();
//===--------------------------------------------------------------------===//
// Attribute List Construction and Mutation
//===--------------------------------------------------------------------===//
/// get - Return a Attributes list with the specified parameters in it.
- static AttrListPtr get(ArrayRef<AttributeWithIndex> Attrs);
+ static AttrListPtr get(LLVMContext &C, ArrayRef<AttributeWithIndex> Attrs);
/// addAttr - Add the specified attribute at the specified index to this
/// attribute list. Since attribute lists are immutable, this
@@ -419,13 +418,6 @@ public:
const AttributeWithIndex &getSlot(unsigned Slot) const;
void dump() const;
-
-private:
- explicit AttrListPtr(AttributeListImpl *L);
-
- /// getAttributes - The attributes for the specified index are
- /// returned. Attributes for the result are denoted with Idx = 0.
- Attributes getAttributes(unsigned Idx) const;
};
} // End llvm namespace
diff --git a/include/llvm/Bitcode/Archive.h b/include/llvm/Bitcode/Archive.h
index 4fd4b5d90a..7b30c7e458 100644
--- a/include/llvm/Bitcode/Archive.h
+++ b/include/llvm/Bitcode/Archive.h
@@ -50,10 +50,10 @@ class ArchiveMember : public ilist_node<ArchiveMember> {
SVR4SymbolTableFlag = 1, ///< Member is a SVR4 symbol table
BSD4SymbolTableFlag = 2, ///< Member is a BSD4 symbol table
LLVMSymbolTableFlag = 4, ///< Member is an LLVM symbol table
- BitcodeFlag = 8, ///< Member is bitcode
- HasPathFlag = 16, ///< Member has a full or partial path
+ BitcodeFlag = 8, ///< Member is bitcode
+ HasPathFlag = 16, ///< Member has a full or partial path
HasLongFilenameFlag = 32, ///< Member uses the long filename syntax
- StringTableFlag = 64 ///< Member is an ar(1) format string table
+ StringTableFlag = 64 ///< Member is an ar(1) format string table
};
/// @}
diff --git a/include/llvm/Bitcode/BitCodes.h b/include/llvm/Bitcode/BitCodes.h
index 28e1ab1c87..b510daf331 100644
--- a/include/llvm/Bitcode/BitCodes.h
+++ b/include/llvm/Bitcode/BitCodes.h
@@ -26,8 +26,8 @@
namespace llvm {
namespace bitc {
enum StandardWidths {
- BlockIDWidth = 8, // We use VBR-8 for block IDs.
- CodeLenWidth = 4, // Codelen are VBR-4.
+ BlockIDWidth = 8, // We use VBR-8 for block IDs.
+ CodeLenWidth = 4, // Codelen are VBR-4.
BlockSizeWidth = 32 // BlockSize up to 2^32 32-bit words = 16GB per block.
};
@@ -69,10 +69,11 @@ namespace bitc {
enum BlockInfoCodes {
// DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd
// block, instead of the BlockInfo block.
-
- BLOCKINFO_CODE_SETBID = 1, // SETBID: [blockid#]
- BLOCKINFO_CODE_BLOCKNAME = 2, // BLOCKNAME: [name]
- BLOCKINFO_CODE_SETRECORDNAME = 3 // BLOCKINFO_CODE_SETRECORDNAME: [id, name]
+
+ BLOCKINFO_CODE_SETBID = 1, // SETBID: [blockid#]
+ BLOCKINFO_CODE_BLOCKNAME = 2, // BLOCKNAME: [name]
+ BLOCKINFO_CODE_SETRECORDNAME = 3 // BLOCKINFO_CODE_SETRECORDNAME:
+ // [id, name]
};
} // End bitc namespace
@@ -99,7 +100,7 @@ public:
explicit BitCodeAbbrevOp(Encoding E, uint64_t Data = 0)
: Val(Data), IsLiteral(false), Enc(E) {}
- bool isLiteral() const { return IsLiteral; }
+ bool isLiteral() const { return IsLiteral; }
bool isEncoding() const { return !IsLiteral; }
// Accessors for literals.
@@ -138,18 +139,18 @@ public:
if (C >= 'a' && C <= 'z') return C-'a';
if (C >= 'A' && C <= 'Z') return C-'A'+26;
if (C >= '0' && C <= '9') return C-'0'+26+26;
- if (C == '.') return 62;
- if (C == '_') return 63;
+ if (C == '.') return 62;
+ if (C == '_') return 63;
llvm_unreachable("Not a value Char6 character!");
}
static char DecodeChar6(unsigned V) {
assert((V & ~63) == 0 && "Not a Char6 encoded character!");
- if (V < 26) return V+'a';
- if (V < 26+26) return V-26+'A';
+ if (V < 26) return V+'a';
+ if (V < 26+26) return V-26+'A';
if (V < 26+26+10) return V-26-26+'0';
- if (V == 62) return '.';
- if (V == 63) return '_';
+ if (V == 62) return '.';
+ if (V == 63) return '_';
llvm_unreachable("Not a value Char6 character!");
}
diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h
index 840f57e752..5b60f72e30 100644
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ b/include/llvm/Bitcode/BitstreamReader.h
@@ -35,12 +35,12 @@ public:
unsigned BlockID;
std::vector<BitCodeAbbrev*> Abbrevs;
std::string Name;
-
+
std::vector<std::pair<unsigned, std::string> > RecordNames;
};
private:
OwningPtr<StreamableMemoryObject> BitcodeBytes;
-
+
std::vector<BlockInfo> BlockInfoRecords;
/// IgnoreBlockInfoNames - This is set to true if we don't care about the
@@ -86,7 +86,7 @@ public:
/// name information.
void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
-
+
//===--------------------------------------------------------------------===//
// Block Manipulation
//===--------------------------------------------------------------------===//
@@ -95,7 +95,7 @@ public:
/// block info block for this Bitstream. We only process it for the first
/// cursor that walks over it.
bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
-
+
/// getBlockInfo - If there is block info for the specified ID, return it,
/// otherwise return null.
const BlockInfo *getBlockInfo(unsigned BlockID) const {
@@ -126,74 +126,74 @@ class BitstreamCursor {
friend class Deserializer;
BitstreamReader *BitStream;
size_t NextChar;
-
+
/// CurWord - This is the current data we have pulled from the stream but have
/// not returned to the client.
uint32_t CurWord;
-
+
/// BitsInCurWord - This is the number of bits in CurWord that are valid. This
/// is always from [0...31] inclusive.
unsigned BitsInCurWord;
-
+
// CurCodeSize - This is the declared size of code values used for the current
// block, in bits.
unsigned CurCodeSize;
-
+
/// CurAbbrevs - Abbrevs installed at in this block.
std::vector<BitCodeAbbrev*> CurAbbrevs;
-
+
struct Block {
unsigned PrevCodeSize;
std::vector<BitCodeAbbrev*> PrevAbbrevs;
explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
};
-
+
/// BlockScope - This tracks the codesize of parent blocks.
SmallVector<Block, 8> BlockScope;
-
+
public:
BitstreamCursor() : BitStream(0), NextChar(0) {
}
BitstreamCursor(const BitstreamCursor &RHS) : BitStream(0), NextChar(0) {
operator=(RHS);
}
-
+
explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) {
NextChar = 0;
CurWord = 0;
BitsInCurWord = 0;
CurCodeSize = 2;
}
-
+
void init(BitstreamReader &R) {
freeState();
-
+
BitStream = &R;
NextChar = 0;
CurWord = 0;
BitsInCurWord = 0;
CurCodeSize = 2;
}
-
+
~BitstreamCursor() {
freeState();
}
-
+
void operator=(const BitstreamCursor &RHS) {
freeState();
-
+
BitStream = RHS.BitStream;
NextChar = RHS.NextChar;
CurWord = RHS.CurWord;
BitsInCurWord = RHS.BitsInCurWord;
CurCodeSize = RHS.CurCodeSize;
-
+
// Copy abbreviations, and bump ref counts.
CurAbbrevs = RHS.CurAbbrevs;
for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
i != e; ++i)
CurAbbrevs[i]->addRef();
-
+
// Copy block scope and bump ref counts.
BlockScope = RHS.BlockScope;
for (unsigned S = 0, e = static_cast<unsigned>(BlockScope.size());
@@ -204,14 +204,14 @@ public:
Abbrevs[i]->addRef();
}
}
-
+
void freeState() {
// Free all the Abbrevs.
for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
i != e; ++i)
CurAbbrevs[i]->dropRef();
CurAbbrevs.clear();
-
+
// Free all the Abbrevs in the block scope.
for (unsigned S = 0, e = static_cast<unsigned>(BlockScope.size());
S != e; ++S) {
@@ -222,10 +222,10 @@ public:
}
BlockScope.clear();
}
-
+
/// GetAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
unsigned GetAbbrevIDWidth() const { return CurCodeSize; }
-
+
bool isEndPos(size_t pos) {
return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos));
}
@@ -255,37 +255,37 @@ public:
bool AtEndOfStream() {
return isEndPos(NextChar) && BitsInCurWord == 0;
}
-
+
/// GetCurrentBitNo - Return the bit # of the bit we are reading.
uint64_t GetCurrentBitNo() const {
return NextChar*CHAR_BIT - BitsInCurWord;
}
-
+
BitstreamReader *getBitStreamReader() {
return BitStream;
}
const BitstreamReader *getBitStreamReader() const {
return BitStream;
}
-
-
+
+
/// JumpToBit - Reset the stream to the specified bit number.
void JumpToBit(uint64_t BitNo) {
uintptr_t ByteNo = uintptr_t(BitNo/8) & ~3;
uintptr_t WordBitNo = uintptr_t(BitNo) & 31;
assert(canSkipToPos(ByteNo) && "Invalid location");
-
+
// Move the cursor to the right word.
NextChar = ByteNo;
BitsInCurWord = 0;
CurWord = 0;
-
+
// Skip over any bits that are already consumed.
if (WordBitNo)
Read(static_cast<unsigned>(WordBitNo));
}
-
-
+
+
uint32_t Read(unsigned NumBits) {
assert(NumBits <= 32 && "Cannot return more than 32 bits!");
// If the field is fully contained by CurWord, return it quickly.
@@ -473,7 +473,7 @@ private:
// If the abbrev specifies the literal value to use, use it.
Vals.push_back(Op.getLiteralValue());
}
-
+
void ReadAbbreviatedField(const BitCodeAbbrevOp &Op,
SmallVectorImpl<uint64_t> &Vals) {
assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!");
@@ -494,13 +494,13 @@ private:
}
public:
- /// getAbbrev - Return the abbreviation for the specified AbbrevId.
+ /// getAbbrev - Return the abbreviation for the specified AbbrevId.
const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV;
assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
return CurAbbrevs[AbbrevNo];
}
-
+
unsigned ReadRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
const char **BlobStart = 0, unsigned *BlobLen = 0) {
if (AbbrevID == bitc::UNABBREV_RECORD) {
@@ -516,7 +516,7 @@ public:
for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) {
const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
if (Op.isLiteral()) {
- ReadAbbreviatedLiteral(Op, Vals);
+ ReadAbbreviatedLiteral(Op, Vals);
} else if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
// Array case. Read the number of elements as a vbr6.
unsigned NumElts = ReadVBR(6);
@@ -535,7 +535,7 @@ public:
// Figure out where the end of this blob will be including tail padding.
size_t NewEnd = NextChar+((NumElts+3)&~3);
-
+
// If this would read off the end of the bitcode file, just set the
// record to empty and return.
if (!canSkipToPos(NewEnd)) {
@@ -543,7 +543,7 @@ public:
NextChar = BitStream->getBitcodeBytes().getExtent();
break;
}
-
+
// Otherwise, read the number of bytes. If we can return a reference to
// the data, do so to avoid copying it.
if (BlobStart) {
@@ -571,7 +571,7 @@ public:
return ReadRecord(AbbrevID, Vals, &BlobStart, &BlobLen);
}
-
+
//===--------------------------------------------------------------------===//
// Abbrev Processing
//===--------------------------------------------------------------------===//
@@ -594,14 +594,14 @@ public:
}
CurAbbrevs.push_back(Abbv);
}
-
+
public:
bool ReadBlockInfoBlock() {
// If this is the second stream to get to the block info block, skip it.
if (BitStream->hasBlockInfoRecords())
return SkipBlock();
-
+
if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true;
SmallVector<uint64_t, 64> Record;
@@ -662,7 +662,7 @@ public:
}
}
};
-
+
} // End llvm namespace
#endif
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h
index dea118f98e..2e8c9f46b8 100644
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@@ -273,7 +273,7 @@ public:
private:
/// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev
- /// record. This is a no-op, since the abbrev specifies the literal to use.
+ /// record. This is a no-op, since the abbrev specifies the literal to use.
template<typename uintty>
void EmitAbbreviatedLiteral(const BitCodeAbbrevOp &Op, uintty V) {
assert(Op.isLiteral() && "Not a literal");
@@ -282,13 +282,13 @@ private:
assert(V == Op.getLiteralValue() &&
"Invalid abbrev for record!");
}
-
+
/// EmitAbbreviatedField - Emit a single scalar field value with the specified
/// encoding.
template<typename uintty>
void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) {
assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!");
-
+
// Encode the value as we are commanded.
switch (Op.getEncoding()) {
default: llvm_unreachable("Unknown encoding!");
@@ -305,7 +305,7 @@ private:
break;
}
}
-
+
/// EmitRecordWithAbbrevImpl - This is the core implementation of the record
/// emission code. If BlobData is non-null, then it specifies an array of
/// data that should be emitted as part of the Blob or Array operand that is
@@ -341,11 +341,11 @@ private:
"Blob data and record entries specified for array!");
// Emit a vbr6 to indicate the number of elements present.
EmitVBR(static_cast<uint32_t>(BlobLen), 6);
-
+
// Emit each field.
for (unsigned i = 0; i != BlobLen; ++i)
EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]);
-
+
// Know that blob data is consumed for assertion below.
BlobData = 0;
} else {
@@ -359,7 +359,7 @@ private:
} else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) {
// If this record has blob data, emit it, otherwise we must have record
// entries to encode this way.
-
+
// Emit a vbr6 to indicate the number of elements present.
if (BlobData) {
EmitVBR(static_cast<uint32_t>(BlobLen), 6);
@@ -368,7 +368,7 @@ private:
} else {
EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
}
-
+
// Flush to a 32-bit alignment boundary.
FlushToWord();
@@ -376,7 +376,7 @@ private:
if (BlobData) {
for (unsigned i = 0; i != BlobLen; ++i)
WriteByte((unsigned char)BlobData[i]);
-
+
// Know that blob data is consumed for assertion below.
BlobData = 0;
} else {
@@ -399,7 +399,7 @@ private:
assert(BlobData == 0 &&
"Blob data specified for record that doesn't use it!");
}
-
+
public:
/// EmitRecord - Emit the specified record to the stream, using an abbrev if
@@ -420,10 +420,10 @@ public:
// Insert the code into Vals to treat it uniformly.
Vals.insert(Vals.begin(), Code);
-
+
EmitRecordWithAbbrev(Abbrev, Vals);
}
-
+
/// EmitRecordWithAbbrev - Emit a record with the specified abbreviation.
/// Unlike EmitRecord, the code for the record should be included in Vals as
/// the first entry.
@@ -431,7 +431,7 @@ public:
void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) {
EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef());
}
-
+
/// EmitRecordWithBlob - Emit the specified record to the stream, using an
/// abbrev that includes a blob at the end. The blob data to emit is
/// specified by the pointer and length specified at the end. In contrast to
@@ -461,7 +461,7 @@ public:
return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData,
ArrayLen));
}
-
+
//===--------------------------------------------------------------------===//
// Abbrev Emission
//===--------------------------------------------------------------------===//
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index c1dc190304..511e3a377a 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -31,16 +31,16 @@ namespace bitc {
PARAMATTR_BLOCK_ID,
UNUSED_ID1,
-
+
CONSTANTS_BLOCK_ID,
FUNCTION_BLOCK_ID,
-
+
UNUSED_ID2,
-
+
VALUE_SYMTAB_BLOCK_ID,
METADATA_BLOCK_ID,
METADATA_ATTACHMENT_ID,
-
+
TYPE_BLOCK_ID_NEW,
USELIST_BLOCK_ID
@@ -93,9 +93,9 @@ namespace bitc {
TYPE_CODE_FUNCTION_OLD = 9, // FUNCTION: [vararg, attrid, retty,
// paramty x N]
-
+
TYPE_CODE_HALF = 10, // HALF
-
+
TYPE_CODE_ARRAY = 11, // ARRAY: [numelts, eltty]
TYPE_CODE_VECTOR = 12, // VECTOR: [numelts, eltty]
@@ -109,7 +109,7 @@ namespace bitc {
TYPE_CODE_METADATA = 16, // METADATA
TYPE_CODE_X86_MMX = 17, // X86 MMX
-
+
TYPE_CODE_STRUCT_ANON = 18, // STRUCT_ANON: [ispacked, eltty x N]
TYPE_CODE_STRUCT_NAME = 19, // STRUCT_NAME: [strchr x N]
TYPE_CODE_STRUCT_NAMED = 20,// STRUCT_NAMED: [ispacked, eltty x N]
@@ -234,7 +234,7 @@ namespace bitc {
OBO_NO_SIGNED_WRAP = 1
};
- /// PossiblyExactOperatorOptionalFlags - Flags for serializing
+ /// PossiblyExactOperatorOptionalFlags - Flags for serializing
/// PossiblyExactOperator's SubclassOptionalData contents.
enum PossiblyExactOperatorOptionalFlags {
PEO_EXACT = 0
diff --git a/include/llvm/CallingConv.h b/include/llvm/CallingConv.h
index 053f4eb326..699cea331c 100644
--- a/include/llvm/CallingConv.h
+++ b/include/llvm/CallingConv.h
@@ -47,6 +47,10 @@ namespace CallingConv {
// GHC - Calling convention used by the Glasgow Haskell Compiler (GHC).
GHC = 10,
+ // HiPE - Calling convention used by the High-Performance Erlang Compiler
+ // (HiPE).
+ HiPE = 11,
+
// Target - This is the start of the target-specific calling conventions,
// e.g. fastcall and thiscall on X86.
FirstTargetCC = 64,
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index 02c5f422ce..2ee7d4a79e 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -408,10 +408,8 @@ namespace llvm {
/// GetSizeOfEncodedValue - Return the size of the encoding in bytes.
unsigned GetSizeOfEncodedValue(unsigned Encoding) const;
- /// EmitReference - Emit a reference to a label with a specified encoding.
- ///
- void EmitReference(const MCSymbol *Sym, unsigned Encoding) const;
- void EmitReference(const GlobalValue *GV, unsigned Encoding) const;
+ /// EmitReference - Emit reference to a ttype global with a specified encoding.
+ void EmitTTypeReference(const GlobalValue *GV, unsigned Encoding) const;
/// EmitSectionOffset - Emit the 4-byte offset of Label from the start of
/// its section. This can be done with a special directive if the target
diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h
index 2d2db78144..e4386fc8e2 100644
--- a/include/llvm/CodeGen/DFAPacketizer.h
+++ b/include/llvm/CodeGen/DFAPacketizer.h
@@ -135,7 +135,7 @@ public:
// initPacketizerState - perform initialization before packetizing
// an instruction. This function is supposed to be overrided by
// the target dependent packetizer.
- virtual void initPacketizerState(void) { return; }
+ virtual void initPacketizerState() { return; }
// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
virtual bool ignorePseudoInstruction(MachineInstr *I,
diff --git a/include/llvm/CodeGen/MachineInstrBundle.h b/include/llvm/CodeGen/MachineInstrBundle.h
index 854ba06209..3c60ad1f29 100644
--- a/include/llvm/CodeGen/MachineInstrBundle.h
+++ b/include/llvm/CodeGen/MachineInstrBundle.h
@@ -149,16 +149,13 @@ public:
/// PhysRegInfo - Information about a physical register used by a set of
/// operands.
struct PhysRegInfo {
- /// Clobbers - Reg or an overlapping register is defined, or a regmask
+ /// Clobbers - Reg or an overlapping register is defined, or a regmask
/// clobbers Reg.
bool Clobbers;
/// Defines - Reg or a super-register is defined.
bool Defines;
- /// DefinesOverlap - Reg or an overlapping register is defined.
- bool DefinesOverlap;
-
/// Reads - Read or a super-register is read.
bool Reads;
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index 6b88d4a949..fc73a3d609 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -372,7 +372,7 @@ public:
/// getCurrentCallSite - Get the call site currently being processed, if any.
/// return zero if none.
- unsigned getCurrentCallSite(void) { return CurCallSite; }
+ unsigned getCurrentCallSite() { return CurCallSite; }
/// getTypeInfos - Return a reference to the C++ typeinfo for the current
/// function.
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index 31bd606f93..88f347e4b5 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -181,7 +181,7 @@ public:
return Queue.begin() + idx;
}
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void dump();
#endif
};
@@ -202,6 +202,10 @@ protected:
RegisterClassInfo *RegClassInfo;
MachineSchedStrategy *SchedImpl;
+ /// Topo - A topological ordering for SUnits which permits fast IsReachable
+ /// and similar queries.
+ ScheduleDAGTopologicalSort Topo;
+
/// Ordered list of DAG postprocessing steps.
std::vector<ScheduleDAGMutation*> Mutations;
@@ -226,6 +230,10 @@ protected:
IntervalPressure BotPressure;
RegPressureTracker BotRPTracker;
+ /// Record the next node in a scheduled cluster.
+ const SUnit *NextClusterPred;
+ const SUnit *NextClusterSucc;
+
#ifndef NDEBUG
/// The number of instructions scheduled so far. Used to cut off the
/// scheduler at the point determined by misched-cutoff.
@@ -236,24 +244,35 @@ public:
ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S):
ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS),
AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S),
- RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure),
- CurrentBottom(), BotRPTracker(BotPressure) {
+ Topo(SUnits, &ExitSU), RPTracker(RegPressure), CurrentTop(),
+ TopRPTracker(TopPressure), CurrentBottom(), BotRPTracker(BotPressure),
+ NextClusterPred(NULL), NextClusterSucc(NULL) {
#ifndef NDEBUG
NumInstrsScheduled = 0;
#endif
}
virtual ~ScheduleDAGMI() {
+ DeleteContainerPointers(Mutations);
delete SchedImpl;
}
/// Add a postprocessing step to the DAG builder.
/// Mutations are applied in the order that they are added after normal DAG
/// building and before MachineSchedStrategy initialization.
+ ///
+ /// ScheduleDAGMI takes ownership of the Mutation object.
void addMutation(ScheduleDAGMutation *Mutation) {
Mutations.push_back(Mutation);
}
+ /// \brief Add a DAG edge to the given SU with the given predecessor
+ /// dependence data.
+ ///
+ /// \returns true if the edge may be added without creating a cycle OR if an
+ /// equivalent edge already existed (false indicates failure).
+ bool addEdge(SUnit *SuccSU, const SDep &PredDep);
+
MachineBasicBlock::iterator top() const { return CurrentTop; }
MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
@@ -285,6 +304,10 @@ public:
return RegionCriticalPSets;
}
+ const SUnit *getNextClusterPred() const { return NextClusterPred; }
+
+ const SUnit *getNextClusterSucc() const { return NextClusterSucc; }
+
protected:
// Top-Level entry points for the schedule() driver...
diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h
index 08d316992e..8752e67a79 100644
--- a/include/llvm/CodeGen/RegisterScavenging.h
+++ b/include/llvm/CodeGen/RegisterScavenging.h
@@ -129,10 +129,12 @@ private:
/// isReserved - Returns true if a register is reserved. It is never "unused".
bool isReserved(unsigned Reg) const { return MRI->isReserved(Reg); }
- /// isUsed / isUnused - Test if a register is currently being used.
+ /// isUsed - Test if a register is currently being used. When called by the
+ /// isAliasUsed function, we only check isReserved if this is the original
+ /// register, not an alias register.
///
- bool isUsed(unsigned Reg) const {
- return !RegsAvailable.test(Reg) || isReserved(Reg);
+ bool isUsed(unsigned Reg, bool CheckReserved = true) const {
+ return !RegsAvailable.test(Reg) || (CheckReserved && isReserved(Reg));
}
/// isAliasUsed - Is Reg or an alias currently in use?
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 7e0ca1478e..016722e7f4 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -57,7 +57,8 @@ namespace llvm {
Barrier, ///< An unknown scheduling barrier.
MayAliasMem, ///< Nonvolatile load/Store instructions that may alias.
MustAliasMem, ///< Nonvolatile load/Store instructions that must alias.
- Artificial ///< Arbitrary weak DAG edge (no actual dependence).
+ Artificial, ///< Arbitrary weak DAG edge (no actual dependence).
+ Cluster ///< Weak DAG edge linking a chain of clustered instrs.
};
private:
@@ -200,12 +201,26 @@ namespace llvm {
return getKind() == Order && Contents.OrdKind == MustAliasMem;
}
+ /// isWeak - Test if this a weak dependence. Weak dependencies are
+ /// considered DAG edges for height computation and other heuristics, but do
+ /// not force ordering. Breaking a weak edge may require the scheduler to
+ /// compensate, for example by inserting a copy.
+ bool isWeak() const {
+ return getKind() == Order && Contents.OrdKind == Cluster;
+ }
+
/// isArtificial - Test if this is an Order dependence that is marked
/// as "artificial", meaning it isn't necessary for correctness.
bool isArtificial() const {
return getKind() == Order && Contents.OrdKind == Artificial;
}
+ /// isCluster - Test if this is an Order dependence that is marked
+ /// as "cluster", meaning it is artificial and wants to be adjacent.
+ bool isCluster() const {
+ return getKind() == Order && Contents.OrdKind == Cluster;
+ }
+
/// isAssignedRegDep - Test if this is a Data dependence that is
/// associated with a register.
bool isAssignedRegDep() const {
@@ -267,6 +282,8 @@ namespace llvm {
unsigned NumSuccs; // # of SDep::Data sucss.
unsigned NumPredsLeft; // # of preds not scheduled.
unsigned NumSuccsLeft; // # of succs not scheduled.
+ unsigned WeakPredsLeft; // # of weak preds not scheduled.
+ unsigned WeakSuccsLeft; // # of weak succs not scheduled.
unsigned short NumRegDefsLeft; // # of reg defs with no scheduled use.
unsigned short Latency; // Node latency.
bool isVRegCycle : 1; // May use and def the same vreg.
@@ -301,12 +318,12 @@ namespace llvm {
SUnit(SDNode *node, unsigned nodenum)
: Node(node), Instr(0), OrigNode(0), SchedClass(0), NodeNum(nodenum),
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
- NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
- isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
- isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
- isPending(false), isAvailable(false), isScheduled(false),
- isScheduleHigh(false), isScheduleLow(false), isCloned(false),
- SchedulingPref(Sched::None),
+ NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0),
+ Latency(0), isVRegCycle(false), isCall(false), isCallOp(false),
+ isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
+ hasPhysRegClobbers(false), isPending(false), isAvailable(false),
+ isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
+ isCloned(false), SchedulingPref(Sched::None),
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
@@ -315,12 +332,12 @@ namespace llvm {
SUnit(MachineInstr *instr, unsigned nodenum)
: Node(0), Instr(instr), OrigNode(0), SchedClass(0), NodeNum(nodenum),
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
- NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
- isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
- isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
- isPending(false), isAvailable(false), isScheduled(false),
- isScheduleHigh(false), isScheduleLow(false), isCloned(false),
- SchedulingPref(Sched::None),
+ NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0),
+ Latency(0), isVRegCycle(false), isCall(false), isCallOp(false),
+ isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
+ hasPhysRegClobbers(false), isPending(false), isAvailable(false),
+ isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
+ isCloned(false), SchedulingPref(Sched::None),
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
@@ -328,12 +345,12 @@ namespace llvm {
SUnit()
: Node(0), Instr(0), OrigNode(0), SchedClass(0), NodeNum(~0u),
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
- NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
- isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
- isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
- isPending(false), isAvailable(false), isScheduled(false),
- isScheduleHigh(false), isScheduleLow(false), isCloned(false),
- SchedulingPref(Sched::None),
+ NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0),
+ Latency(0), isVRegCycle(false), isCall(false), isCallOp(false),
+ isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
+ hasPhysRegClobbers(false), isPending(false), isAvailable(false),
+ isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
+ isCloned(false), SchedulingPref(Sched::None),
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
@@ -372,7 +389,7 @@ namespace llvm {
/// addPred - This adds the specified edge as a pred of the current node if
/// not already. It also adds the current node as a successor of the
/// specified node.
- bool addPred(const SDep &D);
+ bool addPred(const SDep &D, bool Required = true);
/// removePred - This removes the specified edge as a pred of the current
/// node if it exists. It also removes the current node as a successor of
@@ -654,6 +671,7 @@ namespace llvm {
class ScheduleDAGTopologicalSort {
/// SUnits - A reference to the ScheduleDAG's SUnits.
std::vector<SUnit> &SUnits;
+ SUnit *ExitSU;
/// Index2Node - Maps topological index to the node number.
std::vector<int> Index2Node;
@@ -675,7 +693,7 @@ namespace llvm {
void Allocate(int n, int index);
public:
- explicit ScheduleDAGTopologicalSort(std::vector<SUnit> &SUnits);
+ ScheduleDAGTopologicalSort(std::vector<SUnit> &SUnits, SUnit *ExitSU);
/// InitDAGTopologicalSorting - create the initial topological
/// ordering from the DAG to be scheduled.
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 9849e92f7d..f1b3065e7c 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -55,13 +55,12 @@ public:
SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler *Mang, const TargetMachine &TM) const;
- /// getExprForDwarfGlobalReference - Return an MCExpr to use for a reference
- /// to the specified global variable from exception handling information.
- ///
+ /// getTTypeGlobalReference - Return an MCExpr to use for a reference to the
+ /// specified type info global variable from exception handling information.
virtual const MCExpr *
- getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
- MachineModuleInfo *MMI, unsigned Encoding,
- MCStreamer &Streamer) const;
+ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const;
// getCFIPersonalitySymbol - The symbol that gets passed to .cfi_personality.
virtual MCSymbol *
@@ -103,12 +102,12 @@ public:
virtual bool shouldEmitUsedDirectiveFor(const GlobalValue *GV,
Mangler *) const;
- /// getExprForDwarfGlobalReference - The mach-o version of this method
+ /// getTTypeGlobalReference - The mach-o version of this method
/// defaults to returning a stub reference.
virtual const MCExpr *
- getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
- MachineModuleInfo *MMI, unsigned Encoding,
- MCStreamer &Streamer) const;
+ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const;
// getCFIPersonalitySymbol - The symbol that gets passed to .cfi_personality.
virtual MCSymbol *
diff --git a/include/llvm/Constant.h b/include/llvm/Constant.h
index 7fecf4c7b4..b3cb449da1 100644
--- a/include/llvm/Constant.h
+++ b/include/llvm/Constant.h
@@ -65,6 +65,9 @@ public:
/// true for things like constant expressions that could divide by zero.
bool canTrap() const;
+ /// isThreadDependent - Return true if the value can vary between threads.
+ bool isThreadDependent() const;
+
/// isConstantUsed - Return true if the constant has users other than constant
/// exprs and other dangling things.
bool isConstantUsed() const;
@@ -97,7 +100,15 @@ public:
/// 'this' is a constant expr.
Constant *getAggregateElement(unsigned Elt) const;
Constant *getAggregateElement(Constant *Elt) const;
-
+
+ /// getSplatValue - If this is a splat vector constant, meaning that all of
+ /// the elements have the same value, return that value. Otherwise return 0.
+ Constant *getSplatValue() const;
+
+ /// If C is a constant integer then return its value, otherwise C must be a
+ /// vector of constant integers, all equal, and the common value is returned.
+ const APInt &getUniqueInteger() const;
+
/// destroyConstant - Called if some element of this constant is no longer
/// valid. At this point only other constants may be on the use_list for this
/// constant. Any constants on our Use list must also be destroy'd. The
diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h
index 7f94ef464e..456c814596 100644
--- a/include/llvm/Constants.h
+++ b/include/llvm/Constants.h
@@ -8,9 +8,9 @@
//===----------------------------------------------------------------------===//
//
/// @file
-/// This file contains the declarations for the subclasses of Constant,
+/// This file contains the declarations for the subclasses of Constant,
/// which represent the different flavors of constant values that live in LLVM.
-/// Note that Constants are immutable (once created they never change) and are
+/// Note that Constants are immutable (once created they never change) and are
/// fully shared by structural equivalence. This means that two structurally
/// equivalent constants will always have the same address. Constant's are
/// created on demand as needed and never deleted: thus clients don't have to
@@ -44,7 +44,7 @@ template<class ConstantClass, class TypeClass>
struct ConvertConstantType;
//===----------------------------------------------------------------------===//
-/// This is the shared class of boolean and integer constants. This class
+/// This is the shared class of boolean and integer constants. This class
/// represents both boolean and integral constants.
/// @brief Class for constant integers.
class ConstantInt : public Constant {
@@ -63,11 +63,11 @@ public:
static ConstantInt *getFalse(LLVMContext &Context);
static Constant *getTrue(Type *Ty);
static Constant *getFalse(Type *Ty);
-
+
/// If Ty is a vector type, return a Constant with a splat of the given
/// value. Otherwise return a ConstantInt for the given value.
static Constant *get(Type *Ty, uint64_t V, bool isSigned = false);
-
+
/// Return a ConstantInt with the specified integer value for the specified
/// type. If the type is wider than 64 bits, the value will be zero-extended
/// to fit the type, unless isSigned is true, in which case the value will
@@ -84,27 +84,27 @@ public:
/// @brief Get a ConstantInt for a specific signed value.
static ConstantInt *getSigned(IntegerType *Ty, int64_t V);
static Constant *getSigned(Type *Ty, int64_t V);
-
+
/// Return a ConstantInt with the specified value and an implied Type. The
/// type is the integer type that corresponds to the bit width of the value.
static ConstantInt *get(LLVMContext &Context, const APInt &V);
/// Return a ConstantInt constructed from the string strStart with the given
- /// radix.
+ /// radix.
static ConstantInt *get(IntegerType *Ty, StringRef Str,
uint8_t radix);
-
+
/// If Ty is a vector type, return a Constant with a splat of the given
/// value. Otherwise return a ConstantInt for the given value.
static Constant *get(Type* Ty, const APInt& V);
-
+
/// Return the constant as an APInt value reference. This allows clients to
/// obtain a copy of the value, with all its precision in tact.
/// @brief Return the constant's value.
inline const APInt &getValue() const {
return Val;
}
-
+
/// getBitWidth - Return the bitwidth of this constant.
unsigned getBitWidth() const { return Val.getBitWidth(); }
@@ -126,8 +126,8 @@ public:
return Val.getSExtValue();
}
- /// A helper method that can be used to determine if the constant contained
- /// within is equal to a constant. This only works for very small values,
+ /// A helper method that can be used to determine if the constant contained
+ /// within is equal to a constant. This only works for very small values,
/// because this is all that can be represented with all types.
/// @brief Determine if this constant's value is same as an unsigned char.
bool equalsInt(uint64_t V) const {
@@ -141,11 +141,11 @@ public:
return reinterpret_cast<IntegerType*>(Value::getType());
}
- /// This static method returns true if the type Ty is big enough to
- /// represent the value V. This can be used to avoid having the get method
+ /// This static method returns true if the type Ty is big enough to
+ /// represent the value V. This can be used to avoid having the get method
/// assert when V is larger than Ty can represent. Note that there are two
/// versions of this method, one for unsigned and one for signed integers.
- /// Although ConstantInt canonicalizes everything to an unsigned integer,
+ /// Although ConstantInt canonicalizes everything to an unsigned integer,
/// the signed version avoids callers having to convert a signed quantity
/// to the appropriate unsigned type before calling the method.
/// @returns true if V is a valid value for type Ty
@@ -162,7 +162,7 @@ public:
return Val == 0;
}
- /// This is just a convenience method to make client code smaller for a
+ /// This is just a convenience method to make client code smaller for a
/// common case. It also correctly performs the comparison without the
/// potential for an assertion from getZExtValue().
/// @brief Determine if the value is one.
@@ -174,17 +174,17 @@ public:
/// to true.
/// @returns true iff this constant's bits are all set to true.
/// @brief Determine if the value is all ones.
- bool isMinusOne() const {
+ bool isMinusOne() const {
return Val.isAllOnesValue();
}
/// This function will return true iff this constant represents the largest
/// value that may be represented by the constant's type.
- /// @returns true iff this is the largest value that may be represented
+ /// @returns true iff this is the largest value that may be represented
/// by this type.
/// @brief Determine if the value is maximal.
bool isMaxValue(bool isSigned) const {
- if (isSigned)
+ if (isSigned)
return Val.isMaxSignedValue();
else
return Val.isMaxValue();
@@ -192,11 +192,11 @@ public:
/// This function will return true iff this constant represents the smallest
/// value that may be represented by this constant's type.
- /// @returns true if this is the smallest value that may be represented by
+ /// @returns true if this is the smallest value that may be represented by
/// this type.
/// @brief Determine if the value is minimal.
bool isMinValue(bool isSigned) const {
- if (isSigned)
+ if (isSigned)
return Val.isMinSignedValue();
else
return Val.isMinValue();
@@ -248,7 +248,7 @@ public:
/// method returns the negative zero constant for floating point or vector
/// floating point types; for all other types, it returns the null value.
static Constant *getZeroValueForNegation(Type *Ty);
-
+
/// get() - This returns a ConstantFP, or a vector containing a splat of a
/// ConstantFP, for the specified value in the specified type. This should
/// only be used for simple constant values like 2.0/1.0 etc, that are
@@ -258,7 +258,7 @@ public:
static ConstantFP *get(LLVMContext &Context, const APFloat &V);
static ConstantFP *getNegativeZero(Type* Ty);
static ConstantFP *getInfinity(Type *Ty, bool Negative = false);
-
+
/// isValueValidForType - return true if Ty is big enough to represent V.
static bool isValueValidForType(Type *Ty, const APFloat &V);
inline const APFloat &getValueAPF() const { return Val; }
@@ -308,7 +308,7 @@ protected:
}
public:
static ConstantAggregateZero *get(Type *Ty);
-
+
virtual void destroyConstant();
/// getSequentialElement - If this CAZ has array or vector type, return a zero
@@ -346,7 +346,7 @@ protected:
public:
// ConstantArray accessors
static Constant *get(ArrayType *T, ArrayRef<Constant*> V);
-
+
/// Transparently provide more efficient getOperand methods.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
@@ -392,7 +392,7 @@ public:
static Constant *getAnon(ArrayRef<Constant*> V, bool Packed = false) {
return get(getTypeForElements(V, Packed), V);
}
- static Constant *getAnon(LLVMContext &Ctx,
+ static Constant *getAnon(LLVMContext &Ctx,
ArrayRef<Constant*> V, bool Packed = false) {
return get(getTypeForElements(Ctx, V, Packed), V);
}
@@ -405,7 +405,7 @@ public:
static StructType *getTypeForElements(LLVMContext &Ctx,
ArrayRef<Constant*> V,
bool Packed = false);
-
+
/// Transparently provide more efficient getOperand methods.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
@@ -443,11 +443,11 @@ protected:
public:
// ConstantVector accessors
static Constant *get(ArrayRef<Constant*> V);
-
+
/// getSplat - Return a ConstantVector with the specified constant in each
/// element.
static Constant *getSplat(unsigned NumElts, Constant *Elt);
-
+
/// Transparently provide more efficient getOperand methods.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
@@ -512,7 +512,7 @@ public:
return V->getValueID() == ConstantPointerNullVal;
}
};
-
+
//===----------------------------------------------------------------------===//
/// ConstantDataSequential - A vector or array constant whose element type is a
/// simple 1/2/4/8-byte integer or float/double, and whose elements are just
@@ -527,7 +527,7 @@ class ConstantDataSequential : public Constant {
/// DataElements - A pointer to the bytes underlying this constant (which is
/// owned by the uniquing StringMap).
const char *DataElements;
-
+
/// Next - This forms a link list of ConstantDataSequential nodes that have
/// the same value but different type. For example, 0,0,0,1 could be a 4
/// element array of i8, or a 1-element array of i32. They'll both end up in
@@ -539,7 +539,7 @@ protected:
explicit ConstantDataSequential(Type *ty, ValueTy VT, const char *Data)
: Constant(ty, VT, 0, 0), DataElements(Data), Next(0) {}
~ConstantDataSequential() { delete Next; }
-
+
static Constant *getImpl(StringRef Bytes, Type *Ty);
protected:
@@ -548,13 +548,13 @@ protected:
return User::operator new(s, 0);
}
public:
-
+
/// isElementTypeCompatible - Return true if a ConstantDataSequential can be
/// formed with a vector or array of the specified element type.
/// ConstantDataArray only works with normal float and int types that are
/// stored densely in memory, not with things like i42 or x86_f80.
static bool isElementTypeCompatible(const Type *Ty);
-
+
/// getElementAsInteger - If this is a sequential container of integers (of
/// any size), return the specified element in the low bits of a uint64_t.
uint64_t getElementAsInteger(unsigned i) const;
@@ -566,26 +566,26 @@ public:
/// getElementAsFloat - If this is an sequential container of floats, return
/// the specified element as a float.
float getElementAsFloat(unsigned i) const;
-
+
/// getElementAsDouble - If this is an sequential container of doubles, return
/// the specified element as a double.
double getElementAsDouble(unsigned i) const;
-
+
/// getElementAsConstant - Return a Constant for a specified index's element.
/// Note that this has to compute a new constant to return, so it isn't as
/// efficient as getElementAsInteger/Float/Double.
Constant *getElementAsConstant(unsigned i) const;
-
+
/// getType - Specialize the getType() method to always return a
/// SequentialType, which reduces the amount of casting needed in parts of the
/// compiler.
inline SequentialType *getType() const {
return reinterpret_cast<SequentialType*>(Value::getType());
}
-
+
/// getElementType - Return the element type of the array/vector.
Type *getElementType() const;
-
+
/// getNumElements - Return the number of elements in the array or vector.
unsigned getNumElements() const;
@@ -594,14 +594,14 @@ public:
/// byte.
uint64_t getElementByteSize() const;
-
+
/// isString - This method returns true if this is an array of i8.
bool isString() const;
-
+
/// isCString - This method returns true if the array "isString", ends with a
/// nul byte, and does not contains any other nul bytes.
bool isCString() const;
-
+
/// getAsString - If this array is isString(), then this method returns the
/// array as a StringRef. Otherwise, it asserts out.
///
@@ -609,7 +609,7 @@ public:
assert(isString() && "Not a string");
return getRawDataValues();
}
-
+
/// getAsCString - If this array is isCString(), then this method returns the
/// array (without the trailing null byte) as a StringRef. Otherwise, it
/// asserts out.
@@ -619,14 +619,14 @@ public:
StringRef Str = getAsString();
return Str.substr(0, Str.size()-1);
}
-
+
/// getRawDataValues - Return the raw, underlying, bytes of this data. Note
/// that this is an extremely tricky thing to work with, as it exposes the
/// host endianness of the data elements.
StringRef getRawDataValues() const;
-
+
virtual void destroyConstant();
-
+
/// Methods for support type inquiry through isa, cast, and dyn_cast:
///
static bool classof(const Value *V) {
@@ -656,7 +656,7 @@ protected:
return User::operator new(s, 0);
}
public:
-
+
/// get() constructors - Return a constant with array type with an element
/// count and element type matching the ArrayRef passed in. Note that this
/// can return a ConstantAggregateZero object.
@@ -666,7 +666,7 @@ public:
static Constant *get(LLVMContext &Context, ArrayRef<uint64_t> Elts);
static Constant *get(LLVMContext &Context, ArrayRef<float> Elts);
static Constant *get(LLVMContext &Context, ArrayRef<double> Elts);
-
+
/// getString - This method constructs a CDS and initializes it with a text
/// string. The default behavior (AddNull==true) causes a null terminator to
/// be placed at the end of the array (increasing the length of the string by
@@ -681,14 +681,14 @@ public:
inline ArrayType *getType() const {
return reinterpret_cast<ArrayType*>(Value::getType());
}
-
+
/// Methods for support type inquiry through isa, cast, and dyn_cast:
///
static bool classof(const Value *V) {
return V->getValueID() == ConstantDataArrayVal;
}
};
-
+
//===----------------------------------------------------------------------===//
/// ConstantDataVector - A vector constant whose element type is a simple
/// 1/2/4/8-byte integer or float/double, and whose elements are just simple
@@ -708,7 +708,7 @@ protected:
return User::operator new(s, 0);
}
public:
-
+
/// get() constructors - Return a constant with vector type with an element
/// count and element type matching the ArrayRef passed in. Note that this
/// can return a ConstantAggregateZero object.
@@ -718,7 +718,7 @@ public:
static Constant *get(LLVMContext &Context, ArrayRef<uint64_t> Elts);
static Constant *get(LLVMContext &Context, ArrayRef<float> Elts);
static Constant *get(LLVMContext &Context, ArrayRef<double> Elts);
-
+
/// getSplat - Return a ConstantVector with the specified constant in each
/// element. The specified constant has to be a of a compatible type (i8/i16/
/// i32/i64/float/double) and must be a ConstantFP or ConstantInt.
@@ -727,14 +727,14 @@ public:
/// getSplatValue - If this is a splat constant, meaning that all of the
/// elements have the same value, return that value. Otherwise return NULL.
Constant *getSplatValue() const;
-
+
/// getType - Specialize the getType() method to always return a VectorType,
/// which reduces the amount of casting needed in parts of the compiler.
///
inline VectorType *getType() const {
return reinterpret_cast<VectorType*>(Value::getType());
}
-
+
/// Methods for support type inquiry through isa, cast, and dyn_cast:
///
static bool classof(const Value *V) {
@@ -753,20 +753,20 @@ class BlockAddress : public Constant {
public:
/// get - Return a BlockAddress for the specified function and basic block.
static BlockAddress *get(Function *F, BasicBlock *BB);
-
+
/// get - Return a BlockAddress for the specified basic block. The basic
/// block must be embedded into a function.
static BlockAddress *get(BasicBlock *BB);
-
+
/// Transparently provide more efficient getOperand methods.
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
+
Function *getFunction() const { return (Function*)Op<0>().get(); }
BasicBlock *getBasicBlock() const { return (BasicBlock*)Op<1>().get(); }
-
+
virtual void destroyConstant();
virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
-
+
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static inline bool classof(const Value *V) {
return V->getValueID() == BlockAddressVal;
@@ -779,7 +779,7 @@ struct OperandTraits<BlockAddress> :
};
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BlockAddress, Value)
-
+
//===----------------------------------------------------------------------===//
/// ConstantExpr - a constant value that is initialized with an expression using
@@ -809,14 +809,14 @@ public:
/// getAlignOf constant expr - computes the alignment of a type in a target
/// independent way (Note: the return type is an i64).
static Constant *getAlignOf(Type *Ty);
-
+
/// getSizeOf constant expr - computes the (alloc) size of a type (in
/// address-units, not bits) in a target independent way (Note: the return
/// type is an i64).
///
static Constant *getSizeOf(Type *Ty);
- /// getOffsetOf constant expr - computes the offset of a struct field in a
+ /// getOffsetOf constant expr - computes the offset of a struct field in a
/// target independent way (Note: the return type is an i64).
///
static Constant *getOffsetOf(StructType *STy, unsigned FieldNo);
@@ -825,7 +825,7 @@ public:
/// which supports any aggregate type, and any Constant index.
///
static Constant *getOffsetOf(Type *Ty, Constant *FieldNo);
-
+
static Constant *getNeg(Constant *C, bool HasNUW = false, bool HasNSW =false);
static Constant *getFNeg(Constant *C);
static Constant *getNot(Constant *C);
@@ -931,7 +931,7 @@ public:
Type *Ty ///< The type to zext or bitcast C to
);
- // @brief Create a SExt or BitCast cast constant expression
+ // @brief Create a SExt or BitCast cast constant expression
static Constant *getSExtOrBitCast(
Constant *C, ///< The constant to sext or bitcast
Type *Ty ///< The type to sext or bitcast C to
@@ -951,14 +951,14 @@ public:
/// @brief Create a ZExt, Bitcast or Trunc for integer -> integer casts
static Constant *getIntegerCast(
- Constant *C, ///< The integer constant to be casted
+ Constant *C, ///< The integer constant to be casted
Type *Ty, ///< The integer type to cast to
bool isSigned ///< Whether C should be treated as signed or not
);
/// @brief Create a FPExt, Bitcast or FPTrunc for fp -> fp casts
static Constant *getFPCast(
- Constant *C, ///< The integer constant to be casted
+ Constant *C, ///< The integer constant to be casted
Type *Ty ///< The integer type to cast to
);
@@ -1062,7 +1062,7 @@ public:
/// getWithOperandReplaced - Return a constant expression identical to this
/// one, but with the specified operand set to the specified value.
Constant *getWithOperandReplaced(unsigned OpNo, Constant *Op) const;
-
+
/// getWithOperands - This returns the current constant expression with the
/// operands replaced with the specified values. The specified array must
/// have the same number of operands as our current one.
@@ -1076,6 +1076,16 @@ public:
/// current one.
Constant *getWithOperands(ArrayRef<Constant*> Ops, Type *Ty) const;
+ /// getAsInstruction - Returns an Instruction which implements the same operation
+ /// as this ConstantExpr. The instruction is not linked to any basic block.
+ ///
+ /// A better approach to this could be to have a constructor for Instruction
+ /// which would take a ConstantExpr parameter, but that would have spread
+ /// implementation details of ConstantExpr outside of Constants.cpp, which
+ /// would make it harder to remove ConstantExprs altogether
+ /// (http://llvm.org/bugs/show_bug.cgi?id=10368).
+ Instruction *getAsInstruction();
+
virtual void destroyConstant();
virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
@@ -1083,7 +1093,7 @@ public:
static inline bool classof(const Value *V) {
return V->getValueID() == ConstantExprVal;
}
-
+
private:
// Shadow Value::setValueSubclassData with a private forwarding method so that
// subclasses cannot accidentally use it.
@@ -1128,11 +1138,11 @@ public:
/// getSequentialElement - If this Undef has array or vector type, return a
/// undef with the right element type.
UndefValue *getSequentialElement() const;
-
+
/// getStructElement - If this undef has struct type, return a undef with the
/// right element type for the specified element.
UndefValue *getStructElement(unsigned Elt) const;
-
+
/// getElementValue - Return an undef of the right value for the specified GEP
/// index.
UndefValue *getElementValue(Constant *C) const;
diff --git a/include/llvm/DataLayout.h b/include/llvm/DataLayout.h
index 24ad05f17f..e10f9c74c9 100644
--- a/include/llvm/DataLayout.h
+++ b/include/llvm/DataLayout.h
@@ -148,9 +148,9 @@ private:
return &align != &InvalidPointerElem;
}
- /// Initialise a DataLayout object with default values, ensure that the
- /// target data pass is registered.
- void init();
+ /// Parses a target data specification string. Returns an error message
+ /// if the string is malformed, or the empty string on success.
+ std::string parseSpecifier(StringRef LayoutDescription);
public:
/// Default ctor.
@@ -162,17 +162,9 @@ public:
/// Constructs a DataLayout from a specification string. See init().
explicit DataLayout(StringRef LayoutDescription)
: ImmutablePass(ID) {
- std::string errMsg = parseSpecifier(LayoutDescription, this);
- assert(errMsg == "" && "Invalid target data layout string.");
- (void)errMsg;
+ init(LayoutDescription);
}
- /// Parses a target data specification string. Returns an error message
- /// if the string is malformed, or the empty string on success. Optionally
- /// initialises a DataLayout object if passed a non-null pointer.
- static std::string parseSpecifier(StringRef LayoutDescription,
- DataLayout* td = 0);
-
/// Initialize target data from properties stored in the module.
explicit DataLayout(const Module *M);
@@ -187,6 +179,10 @@ public:
~DataLayout(); // Not virtual, do not subclass this class
+ /// Parse a data layout string (with fallback to default values). Ensure that
+ /// the data layout pass is registered.
+ void init(StringRef LayoutDescription);
+
/// Layout endianness...
bool isLittleEndian() const { return LittleEndian; }
bool isBigEndian() const { return !LittleEndian; }
@@ -285,6 +281,7 @@ public:
/// getTypeSizeInBits - Return the number of bits necessary to hold the
/// specified type. For example, returns 36 for i36 and 80 for x86_fp80.
+ /// The type passed must have a size (Type::isSized() must return true).
uint64_t getTypeSizeInBits(Type* Ty) const;
/// getTypeStoreSize - Return the maximum number of bytes that may be
diff --git a/include/llvm/DebugInfo.h b/include/llvm/DebugInfo.h
index dae03ad100..8520ebaba5 100644
--- a/include/llvm/DebugInfo.h
+++ b/include/llvm/DebugInfo.h
@@ -215,9 +215,9 @@ namespace llvm {
}
StringRef getFilename() const { return getStringField(1); }
StringRef getDirectory() const { return getStringField(2); }
- DICompileUnit getCompileUnit() const{
+ DICompileUnit getCompileUnit() const{
assert (getVersion() <= LLVMDebugVersion10 && "Invalid CompileUnit!");
- return getFieldAs<DICompileUnit>(3);
+ return getFieldAs<DICompileUnit>(3);
}
};
@@ -252,11 +252,11 @@ namespace llvm {
DIScope getContext() const { return getFieldAs<DIScope>(1); }
StringRef getName() const { return getStringField(2); }
- DICompileUnit getCompileUnit() const{
+ DICompileUnit getCompileUnit() const{
assert (getVersion() <= LLVMDebugVersion10 && "Invalid getCompileUnit!");
if (getVersion() == llvm::LLVMDebugVersion7)
return getFieldAs<DICompileUnit>(3);
-
+
return getFieldAs<DIFile>(3).getCompileUnit();
}
DIFile getFile() const { return getFieldAs<DIFile>(3); }
@@ -298,13 +298,13 @@ namespace llvm {
bool isValid() const {
return DbgNode && (isBasicType() || isDerivedType() || isCompositeType());
}
- StringRef getDirectory() const {
+ StringRef getDirectory() const {
if (getVersion() == llvm::LLVMDebugVersion7)
return getCompileUnit().getDirectory();
return getFieldAs<DIFile>(3).getDirectory();
}
- StringRef getFilename() const {
+ StringRef getFilename() const {
if (getVersion() == llvm::LLVMDebugVersion7)
return getCompileUnit().getFilename();
@@ -349,14 +349,14 @@ namespace llvm {
/// return base type size.
uint64_t getOriginalTypeSize() const;
- /// getObjCProperty - Return property node, if this ivar is
+ /// getObjCProperty - Return property node, if this ivar is
/// associated with one.
MDNode *getObjCProperty() const;
- StringRef getObjCPropertyName() const {
+ StringRef getObjCPropertyName() const {
if (getVersion() > LLVMDebugVersion11)
return StringRef();
- return getStringField(10);
+ return getStringField(10);
}
StringRef getObjCPropertyGetterName() const {
assert (getVersion() <= LLVMDebugVersion11 && "Invalid Request");
@@ -427,10 +427,10 @@ namespace llvm {
DIScope getContext() const { return getFieldAs<DIScope>(1); }
StringRef getName() const { return getStringField(2); }
DIType getType() const { return getFieldAs<DIType>(3); }
- StringRef getFilename() const {
+ StringRef getFilename() const {
return getFieldAs<DIFile>(4).getFilename();
}
- StringRef getDirectory() const {
+ StringRef getDirectory() const {
return getFieldAs<DIFile>(4).getDirectory();
}
unsigned getLineNumber() const { return getUnsignedField(5); }
@@ -446,10 +446,10 @@ namespace llvm {
StringRef getName() const { return getStringField(2); }
DIType getType() const { return getFieldAs<DIType>(3); }
uint64_t getValue() const { return getUInt64Field(4); }
- StringRef getFilename() const {
+ StringRef getFilename() const {
return getFieldAs<DIFile>(5).getFilename();
}
- StringRef getDirectory() const {
+ StringRef getDirectory() const {
return getFieldAs<DIFile>(5).getDirectory();
}
unsigned getLineNumber() const { return getUnsignedField(6); }
@@ -467,12 +467,12 @@ namespace llvm {
StringRef getName() const { return getStringField(3); }
StringRef getDisplayName() const { return getStringField(4); }
StringRef getLinkageName() const { return getStringField(5); }
- DICompileUnit getCompileUnit() const{
+ DICompileUnit getCompileUnit() const{
assert (getVersion() <= LLVMDebugVersion10 && "Invalid getCompileUnit!");
if (getVersion() == llvm::LLVMDebugVersion7)
return getFieldAs<DICompileUnit>(6);
- return getFieldAs<DIFile>(6).getCompileUnit();
+ return getFieldAs<DIFile>(6).getCompileUnit();
}
unsigned getLineNumber() const { return getUnsignedField(7); }
DICompositeType getType() const { return getFieldAs<DICompositeType>(8); }
@@ -502,33 +502,33 @@ namespace llvm {
return getFieldAs<DICompositeType>(13);
}
- unsigned isArtificial() const {
+ unsigned isArtificial() const {
if (getVersion() <= llvm::LLVMDebugVersion8)
- return getUnsignedField(14);
+ return getUnsignedField(14);
return (getUnsignedField(14) & FlagArtificial) != 0;
}
/// isPrivate - Return true if this subprogram has "private"
/// access specifier.
- bool isPrivate() const {
+ bool isPrivate() const {
if (getVersion() <= llvm::LLVMDebugVersion8)
return false;
return (getUnsignedField(14) & FlagPrivate) != 0;
}
/// isProtected - Return true if this subprogram has "protected"
/// access specifier.
- bool isProtected() const {
+ bool isProtected() const {
if (getVersion() <= llvm::LLVMDebugVersion8)
return false;
return (getUnsignedField(14) & FlagProtected) != 0;
}
/// isExplicit - Return true if this subprogram is marked as explicit.
- bool isExplicit() const {
+ bool isExplicit() const {
if (getVersion() <= llvm::LLVMDebugVersion8)
return false;
return (getUnsignedField(14) & FlagExplicit) != 0;
}
/// isPrototyped - Return true if this subprogram is prototyped.
- bool isPrototyped() const {
+ bool isPrototyped() const {
if (getVersion() <= llvm::LLVMDebugVersion8)
return false;
return (getUnsignedField(14) & FlagPrototyped) != 0;
@@ -536,18 +536,18 @@ namespace llvm {
unsigned isOptimized() const;
- StringRef getFilename() const {
+ StringRef getFilename() const {
if (getVersion() == llvm::LLVMDebugVersion7)
return getCompileUnit().getFilename();
- return getFieldAs<DIFile>(6).getFilename();
+ return getFieldAs<DIFile>(6).getFilename();
}
- StringRef getDirectory() const {
+ StringRef getDirectory() const {
if (getVersion() == llvm::LLVMDebugVersion7)
return getCompileUnit().getFilename();
- return getFieldAs<DIFile>(6).getDirectory();
+ return getFieldAs<DIFile>(6).getDirectory();
}
/// getScopeLineNumber - Get the beginning of the scope of the
@@ -583,25 +583,25 @@ namespace llvm {
StringRef getName() const { return getStringField(3); }
StringRef getDisplayName() const { return getStringField(4); }
StringRef getLinkageName() const { return getStringField(5); }
- DICompileUnit getCompileUnit() const{
+ DICompileUnit getCompileUnit() const{
assert (getVersion() <= LLVMDebugVersion10 && "Invalid getCompileUnit!");
if (getVersion() == llvm::LLVMDebugVersion7)
return getFieldAs<DICompileUnit>(6);
- DIFile F = getFieldAs<DIFile>(6);
+ DIFile F = getFieldAs<DIFile>(6);
return F.getCompileUnit();
}
StringRef getFilename() const {
if (getVersion() <= llvm::LLVMDebugVersion10)
return getContext().getFilename();
return getFieldAs<DIFile>(6).getFilename();
- }
+ }
StringRef getDirectory() const {
if (getVersion() <= llvm::LLVMDebugVersion10)
return getContext().getDirectory();
return getFieldAs<DIFile>(6).getDirectory();
- }
+ }
unsigned getLineNumber() const { return getUnsignedField(7); }
DIType getType() const { return getFieldAs<DIType>(8); }
@@ -626,25 +626,25 @@ namespace llvm {
DIScope getContext() const { return getFieldAs<DIScope>(1); }
StringRef getName() const { return getStringField(2); }
- DICompileUnit getCompileUnit() const {
+ DICompileUnit getCompileUnit() const {
assert (getVersion() <= LLVMDebugVersion10 && "Invalid getCompileUnit!");
if (getVersion() == llvm::LLVMDebugVersion7)
return getFieldAs<DICompileUnit>(3);
- DIFile F = getFieldAs<DIFile>(3);
+ DIFile F = getFieldAs<DIFile>(3);
return F.getCompileUnit();
}
- unsigned getLineNumber() const {
- return (getUnsignedField(4) << 8) >> 8;
+ unsigned getLineNumber() const {
+ return (getUnsignedField(4) << 8) >> 8;
}
unsigned getArgNumber() const {
- unsigned L = getUnsignedField(4);
+ unsigned L = getUnsignedField(4);
return L >> 24;
}
DIType getType() const { return getFieldAs<DIType>(5); }
-
+
/// isArtificial - Return true if this variable is marked as "artificial".
- bool isArtificial() const {
+ bool isArtificial() const {
if (getVersion() <= llvm::LLVMDebugVersion8)
return false;
return (getUnsignedField(6) & FlagArtificial) != 0;
@@ -666,7 +666,7 @@ namespace llvm {
}
unsigned getNumAddrElements() const;
-
+
uint64_t getAddrElement(unsigned Idx) const {
if (getVersion() <= llvm::LLVMDebugVersion8)
return getUInt64Field(Idx+6);
@@ -726,23 +726,23 @@ namespace llvm {
};
/// DINameSpace - A wrapper for a C++ style name space.
- class DINameSpace : public DIScope {
+ class DINameSpace : public DIScope {
public:
explicit DINameSpace(const MDNode *N = 0) : DIScope(N) {}
DIScope getContext() const { return getFieldAs<DIScope>(1); }
StringRef getName() const { return getStringField(2); }
- StringRef getDirectory() const {
+ StringRef getDirectory() const {
return getFieldAs<DIFile>(3).getDirectory();
}
- StringRef getFilename() const {
+ StringRef getFilename() const {
return getFieldAs<DIFile>(3).getFilename();
}
- DICompileUnit getCompileUnit() const{
+ DICompileUnit getCompileUnit() const{
assert (getVersion() <= LLVMDebugVersion10 && "Invalid getCompileUnit!");
if (getVersion() == llvm::LLVMDebugVersion7)
return getFieldAs<DICompileUnit>(3);
- return getFieldAs<DIFile>(3).getCompileUnit();
+ return getFieldAs<DIFile>(3).getCompileUnit();
}
unsigned getLineNumber() const { return getUnsignedField(4); }
bool Verify() const;
@@ -818,7 +818,7 @@ namespace llvm {
/// to hold function specific information.
NamedMDNode *getOrInsertFnSpecificMDNode(Module &M, DISubprogram SP);
- /// getFnSpecificMDNode - Return a NameMDNode, if available, that is
+ /// getFnSpecificMDNode - Return a NameMDNode, if available, that is
/// suitable to hold function specific information.
NamedMDNode *getFnSpecificMDNode(const Module &M, DISubprogram SP);
@@ -836,7 +836,7 @@ namespace llvm {
public:
/// processModule - Process entire module and collect debug info
/// anchors.
- void processModule(Module &M);
+ void processModule(const Module &M);
private:
/// processType - Process DIType.
@@ -849,7 +849,7 @@ namespace llvm {
void processSubprogram(DISubprogram SP);
/// processDeclare - Process DbgDeclareInst.
- void processDeclare(DbgDeclareInst *DDI);
+ void processDeclare(const DbgDeclareInst *DDI);
/// processLocation - Process DILocation.
void processLocation(DILocation Loc);
diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h
index 26bd1f6275..9bfb19d693 100644
--- a/include/llvm/DebugInfo/DIContext.h
+++ b/include/llvm/DebugInfo/DIContext.h
@@ -19,6 +19,8 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/RelocVisitor.h"
#include "llvm/Support/DataTypes.h"
namespace llvm {
@@ -102,14 +104,7 @@ public:
virtual ~DIContext();
/// getDWARFContext - get a context for binary DWARF data.
- static DIContext *getDWARFContext(bool isLittleEndian,
- StringRef infoSection,
- StringRef abbrevSection,
- StringRef aRangeSection = StringRef(),
- StringRef lineSection = StringRef(),
- StringRef stringSection = StringRef(),
- StringRef rangeSection = StringRef(),
- const RelocAddrMap &Map = RelocAddrMap());
+ static DIContext *getDWARFContext(object::ObjectFile *);
virtual void dump(raw_ostream &OS) = 0;
diff --git a/include/llvm/ExecutionEngine/NaClJITMemoryManager.h b/include/llvm/ExecutionEngine/NaClJITMemoryManager.h
index dcd06627df..535d64b133 100644
--- a/include/llvm/ExecutionEngine/NaClJITMemoryManager.h
+++ b/include/llvm/ExecutionEngine/NaClJITMemoryManager.h
@@ -189,7 +189,10 @@ class NaClJITMemoryManager : public JITMemoryManager {
/// the instance class to use if it needs to communicate to the JIT about
/// a given section after the fact.
virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID);
+ unsigned SectionID, bool IsReadOnly);
+
+ /// Ignored.
+ virtual bool applyPermissions(std::string *ErrMsg = 0) {}
/// allocateSpace - Allocate a memory block of the given size. This method
/// cannot be called between calls to startFunctionBody and endFunctionBody.
diff --git a/include/llvm/ExecutionEngine/OProfileWrapper.h b/include/llvm/ExecutionEngine/OProfileWrapper.h
index ab7f25e9d0..99553a3fd8 100644
--- a/include/llvm/ExecutionEngine/OProfileWrapper.h
+++ b/include/llvm/ExecutionEngine/OProfileWrapper.h
@@ -41,10 +41,10 @@ class OProfileWrapper {
typedef int (*op_unload_native_code_ptr_t)(op_agent_t, uint64_t);
// Also used for op_minor_version function which has the same signature
- typedef int (*op_major_version_ptr_t)(void);
+ typedef int (*op_major_version_ptr_t)();
// This is not a part of the opagent API, but is useful nonetheless
- typedef bool (*IsOProfileRunningPtrT)(void);
+ typedef bool (*IsOProfileRunningPtrT)();
op_agent_t Agent;
@@ -99,8 +99,8 @@ public:
size_t num_entries,
struct debug_line_info const* info);
int op_unload_native_code(uint64_t addr);
- int op_major_version(void);
- int op_minor_version(void);
+ int op_major_version();
+ int op_minor_version();
// Returns true if the oprofiled process is running, the opagent library is
// loaded and a connection to the agent has been established, and false
diff --git a/include/llvm/ExecutionEngine/ObjectBuffer.h b/include/llvm/ExecutionEngine/ObjectBuffer.h
index a0a77b8ba8..3045fbd60d 100644
--- a/include/llvm/ExecutionEngine/ObjectBuffer.h
+++ b/include/llvm/ExecutionEngine/ObjectBuffer.h
@@ -1,80 +1,80 @@
-//===---- ObjectBuffer.h - Utility class to wrap object image memory -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares a wrapper class to hold the memory into which an
-// object will be generated.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_OBJECTBUFFER_H
-#define LLVM_EXECUTIONENGINE_OBJECTBUFFER_H
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/MemoryBuffer.h"
-
-namespace llvm {
-
-/// ObjectBuffer - This class acts as a container for the memory buffer used during
-/// generation and loading of executable objects using MCJIT and RuntimeDyld. The
-/// underlying memory for the object will be owned by the ObjectBuffer instance
-/// throughout its lifetime. The getMemBuffer() method provides a way to create a
-/// MemoryBuffer wrapper object instance to be owned by other classes (such as
-/// ObjectFile) as needed, but the MemoryBuffer instance returned does not own the
-/// actual memory it points to.
-class ObjectBuffer {
-public:
- ObjectBuffer() {}
- ObjectBuffer(MemoryBuffer* Buf) : Buffer(Buf) {}
- virtual ~ObjectBuffer() {}
-
- /// getMemBuffer - Like MemoryBuffer::getMemBuffer() this function
- /// returns a pointer to an object that is owned by the caller. However,
- /// the caller does not take ownership of the underlying memory.
- MemoryBuffer *getMemBuffer() const {
- return MemoryBuffer::getMemBuffer(Buffer->getBuffer(), "", false);
- }
-
- const char *getBufferStart() const { return Buffer->getBufferStart(); }
- size_t getBufferSize() const { return Buffer->getBufferSize(); }
-
-protected:
- // The memory contained in an ObjectBuffer
- OwningPtr<MemoryBuffer> Buffer;
-};
-
-/// ObjectBufferStream - This class encapsulates the SmallVector and
-/// raw_svector_ostream needed to generate an object using MC code emission
-/// while providing a common ObjectBuffer interface for access to the
-/// memory once the object has been generated.
-class ObjectBufferStream : public ObjectBuffer {
-public:
- ObjectBufferStream() : OS(SV) {}
- virtual ~ObjectBufferStream() {}
-
- raw_ostream &getOStream() { return OS; }
- void flush()
- {
- OS.flush();
-
- // Make the data accessible via the ObjectBuffer::Buffer
- Buffer.reset(MemoryBuffer::getMemBuffer(StringRef(SV.data(), SV.size()),
- "",
- false));
- }
-
-protected:
- SmallVector<char, 4096> SV; // Working buffer into which we JIT.
- raw_svector_ostream OS; // streaming wrapper
-};
-
-} // namespace llvm
-
-#endif
+//===---- ObjectBuffer.h - Utility class to wrap object image memory -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares a wrapper class to hold the memory into which an
+// object will be generated.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_OBJECTBUFFER_H
+#define LLVM_EXECUTIONENGINE_OBJECTBUFFER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace llvm {
+
+/// ObjectBuffer - This class acts as a container for the memory buffer used during
+/// generation and loading of executable objects using MCJIT and RuntimeDyld. The
+/// underlying memory for the object will be owned by the ObjectBuffer instance
+/// throughout its lifetime. The getMemBuffer() method provides a way to create a
+/// MemoryBuffer wrapper object instance to be owned by other classes (such as
+/// ObjectFile) as needed, but the MemoryBuffer instance returned does not own the
+/// actual memory it points to.
+class ObjectBuffer {
+public:
+ ObjectBuffer() {}
+ ObjectBuffer(MemoryBuffer* Buf) : Buffer(Buf) {}
+ virtual ~ObjectBuffer() {}
+
+ /// getMemBuffer - Like MemoryBuffer::getMemBuffer() this function
+ /// returns a pointer to an object that is owned by the caller. However,
+ /// the caller does not take ownership of the underlying memory.
+ MemoryBuffer *getMemBuffer() const {
+ return MemoryBuffer::getMemBuffer(Buffer->getBuffer(), "", false);
+ }
+
+ const char *getBufferStart() const { return Buffer->getBufferStart(); }
+ size_t getBufferSize() const { return Buffer->getBufferSize(); }
+
+protected:
+ // The memory contained in an ObjectBuffer
+ OwningPtr<MemoryBuffer> Buffer;
+};
+
+/// ObjectBufferStream - This class encapsulates the SmallVector and
+/// raw_svector_ostream needed to generate an object using MC code emission
+/// while providing a common ObjectBuffer interface for access to the
+/// memory once the object has been generated.
+class ObjectBufferStream : public ObjectBuffer {
+public:
+ ObjectBufferStream() : OS(SV) {}
+ virtual ~ObjectBufferStream() {}
+
+ raw_ostream &getOStream() { return OS; }
+ void flush()
+ {
+ OS.flush();
+
+ // Make the data accessible via the ObjectBuffer::Buffer
+ Buffer.reset(MemoryBuffer::getMemBuffer(StringRef(SV.data(), SV.size()),
+ "",
+ false));
+ }
+
+protected:
+ SmallVector<char, 4096> SV; // Working buffer into which we JIT.
+ raw_svector_ostream OS; // streaming wrapper
+};
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/ExecutionEngine/ObjectImage.h b/include/llvm/ExecutionEngine/ObjectImage.h
index 82549add62..a92d1d55df 100644
--- a/include/llvm/ExecutionEngine/ObjectImage.h
+++ b/include/llvm/ExecutionEngine/ObjectImage.h
@@ -1,61 +1,61 @@
-//===---- ObjectImage.h - Format independent executuable object image -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares a file format independent ObjectImage class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_OBJECTIMAGE_H
-#define LLVM_EXECUTIONENGINE_OBJECTIMAGE_H
-
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/ExecutionEngine/ObjectBuffer.h"
-
-namespace llvm {
-
-
-/// ObjectImage - A container class that represents an ObjectFile that has been
-/// or is in the process of being loaded into memory for execution.
-class ObjectImage {
- ObjectImage() LLVM_DELETED_FUNCTION;
- ObjectImage(const ObjectImage &other) LLVM_DELETED_FUNCTION;
-
-protected:
- OwningPtr<ObjectBuffer> Buffer;
-
-public:
- ObjectImage(ObjectBuffer *Input) : Buffer(Input) {}
- virtual ~ObjectImage() {}
-
- virtual object::symbol_iterator begin_symbols() const = 0;
- virtual object::symbol_iterator end_symbols() const = 0;
-
- virtual object::section_iterator begin_sections() const = 0;
- virtual object::section_iterator end_sections() const = 0;
-
- virtual /* Triple::ArchType */ unsigned getArch() const = 0;
-
- // Subclasses can override these methods to update the image with loaded
- // addresses for sections and common symbols
- virtual void updateSectionAddress(const object::SectionRef &Sec,
- uint64_t Addr) = 0;
- virtual void updateSymbolAddress(const object::SymbolRef &Sym,
- uint64_t Addr) = 0;
-
- virtual StringRef getData() const = 0;
-
- // Subclasses can override these methods to provide JIT debugging support
- virtual void registerWithDebugger() = 0;
- virtual void deregisterWithDebugger() = 0;
-};
-
-} // end namespace llvm
-
-#endif // LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H
-
+//===---- ObjectImage.h - Format independent executuable object image -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares a file format independent ObjectImage class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_OBJECTIMAGE_H
+#define LLVM_EXECUTIONENGINE_OBJECTIMAGE_H
+
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/ExecutionEngine/ObjectBuffer.h"
+
+namespace llvm {
+
+
+/// ObjectImage - A container class that represents an ObjectFile that has been
+/// or is in the process of being loaded into memory for execution.
+class ObjectImage {
+ ObjectImage() LLVM_DELETED_FUNCTION;
+ ObjectImage(const ObjectImage &other) LLVM_DELETED_FUNCTION;
+
+protected:
+ OwningPtr<ObjectBuffer> Buffer;
+
+public:
+ ObjectImage(ObjectBuffer *Input) : Buffer(Input) {}
+ virtual ~ObjectImage() {}
+
+ virtual object::symbol_iterator begin_symbols() const = 0;
+ virtual object::symbol_iterator end_symbols() const = 0;
+
+ virtual object::section_iterator begin_sections() const = 0;
+ virtual object::section_iterator end_sections() const = 0;
+
+ virtual /* Triple::ArchType */ unsigned getArch() const = 0;
+
+ // Subclasses can override these methods to update the image with loaded
+ // addresses for sections and common symbols
+ virtual void updateSectionAddress(const object::SectionRef &Sec,
+ uint64_t Addr) = 0;
+ virtual void updateSymbolAddress(const object::SymbolRef &Sym,
+ uint64_t Addr) = 0;
+
+ virtual StringRef getData() const = 0;
+
+ // Subclasses can override these methods to provide JIT debugging support
+ virtual void registerWithDebugger() = 0;
+ virtual void deregisterWithDebugger() = 0;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_OBJECTIMAGE_H
+
diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h
index 891f534862..c3d160f012 100644
--- a/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -48,7 +48,7 @@ public:
/// assigned by the JIT engine, and optionally recorded by the memory manager
/// to access a loaded section.
virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID) = 0;
+ unsigned SectionID, bool IsReadOnly) = 0;
/// getPointerToNamedFunction - This method returns the address of the
/// specified function. As such it is only useful for resolving library
@@ -59,6 +59,15 @@ public:
/// message to stderr and aborts.
virtual void *getPointerToNamedFunction(const std::string &Name,
bool AbortOnFailure = true) = 0;
+
+ /// applyPermissions - This method is called when object loading is
+ /// complete and section page permissions can be applied. It is up to
+ /// the memory manager implementation to decide whether or not to act
+ /// on this method. The memory manager will typically allocate all
+ /// sections as read-write and then apply specific permissions when
+ /// this method is called. Returns true if an error occurred, false
+ /// otherwise.
+ virtual bool applyPermissions(std::string *ErrMsg = 0) = 0;
};
class RuntimeDyld {
diff --git a/include/llvm/Instruction.h b/include/llvm/Instruction.h
index 8aa8a56bf8..0768df1684 100644
--- a/include/llvm/Instruction.h
+++ b/include/llvm/Instruction.h
@@ -33,7 +33,7 @@ class Instruction : public User, public ilist_node<Instruction> {
BasicBlock *Parent;
DebugLoc DbgLoc; // 'dbg' Metadata cache.
-
+
enum {
/// HasMetadataBit - This is a bit stored in the SubClassData field which
/// indicates whether this instruction has metadata attached to it or not.
@@ -42,12 +42,12 @@ class Instruction : public User, public ilist_node<Instruction> {
public:
// Out of line virtual method, so the vtable, etc has a home.
~Instruction();
-
+
/// use_back - Specialize the methods defined in Value, as we know that an
/// instruction can only be used by other instructions.
Instruction *use_back() { return cast<Instruction>(*use_begin());}
const Instruction *use_back() const { return cast<Instruction>(*use_begin());}
-
+
inline const BasicBlock *getParent() const { return Parent; }
inline BasicBlock *getParent() { return Parent; }
@@ -77,16 +77,16 @@ public:
//===--------------------------------------------------------------------===//
// Subclass classification.
//===--------------------------------------------------------------------===//
-
+
/// getOpcode() returns a member of one of the enums like Instruction::Add.
unsigned getOpcode() const { return getValueID() - InstructionVal; }
-
+
const char *getOpcodeName() const { return getOpcodeName(getOpcode()); }
bool isTerminator() const { return isTerminator(getOpcode()); }
bool isBinaryOp() const { return isBinaryOp(getOpcode()); }
bool isShift() { return isShift(getOpcode()); }
bool isCast() const { return isCast(getOpcode()); }
-
+
static const char* getOpcodeName(unsigned OpCode);
static inline bool isTerminator(unsigned OpCode) {
@@ -121,33 +121,33 @@ public:
//===--------------------------------------------------------------------===//
// Metadata manipulation.
//===--------------------------------------------------------------------===//
-
+
/// hasMetadata() - Return true if this instruction has any metadata attached
/// to it.
bool hasMetadata() const {
return !DbgLoc.isUnknown() || hasMetadataHashEntry();
}
-
+
/// hasMetadataOtherThanDebugLoc - Return true if this instruction has
/// metadata attached to it other than a debug location.
bool hasMetadataOtherThanDebugLoc() const {
return hasMetadataHashEntry();
}
-
+
/// getMetadata - Get the metadata of given kind attached to this Instruction.
/// If the metadata is not found then return null.
MDNode *getMetadata(unsigned KindID) const {
if (!hasMetadata()) return 0;
return getMetadataImpl(KindID);
}
-
+
/// getMetadata - Get the metadata of given kind attached to this Instruction.
/// If the metadata is not found then return null.
MDNode *getMetadata(StringRef Kind) const {
if (!hasMetadata()) return 0;
return getMetadataImpl(Kind);
}
-
+
/// getAllMetadata - Get all metadata attached to this Instruction. The first
/// element of each pair returned is the KindID, the second element is the
/// metadata value. This list is returned sorted by the KindID.
@@ -155,7 +155,7 @@ public:
if (hasMetadata())
getAllMetadataImpl(MDs);
}
-
+
/// getAllMetadataOtherThanDebugLoc - This does the same thing as
/// getAllMetadata, except that it filters out the debug location.
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl<std::pair<unsigned,
@@ -163,7 +163,7 @@ public:
if (hasMetadataOtherThanDebugLoc())
getAllMetadataOtherThanDebugLocImpl(MDs);
}
-
+
/// setMetadata - Set the metadata of the specified kind to the specified
/// node. This updates/replaces metadata if already present, or removes it if
/// Node is null.
@@ -172,17 +172,17 @@ public:
/// setDebugLoc - Set the debug location information for this instruction.
void setDebugLoc(const DebugLoc &Loc) { DbgLoc = Loc; }
-
+
/// getDebugLoc - Return the debug location for this node as a DebugLoc.
const DebugLoc &getDebugLoc() const { return DbgLoc; }
-
+
private:
/// hasMetadataHashEntry - Return true if we have an entry in the on-the-side
/// metadata hash.
bool hasMetadataHashEntry() const {
return (getSubclassDataFromValue() & HasMetadataBit) != 0;
}
-
+
// These are all implemented in Metadata.cpp.
MDNode *getMetadataImpl(unsigned KindID) const;
MDNode *getMetadataImpl(StringRef Kind) const;
@@ -194,8 +194,8 @@ public:
//===--------------------------------------------------------------------===//
// Predicates and helper methods.
//===--------------------------------------------------------------------===//
-
-
+
+
/// isAssociative - Return true if the instruction is associative:
///
/// Associative operators satisfy: x op (y op z) === (x op y) op z
@@ -271,12 +271,12 @@ public:
/// * The instruction has no name
///
Instruction *clone() const;
-
+
/// isIdenticalTo - Return true if the specified instruction is exactly
/// identical to the current one. This means that all operands match and any
/// extra information (e.g. load is volatile) agree.
bool isIdenticalTo(const Instruction *I) const;
-
+
/// isIdenticalToWhenDefined - This is like isIdenticalTo, except that it
/// ignores the SubclassOptionalData flags, which specify conditions
/// under which the instruction's result is undefined.
@@ -291,7 +291,7 @@ public:
/// as equivalent.
CompareUsingScalarTypes = 1<<1
};
-
+
/// This function determines if the specified instruction executes the same
/// operation as the current one. This means that the opcodes, type, operand
/// types and any other factors affecting the operation must be the same. This
@@ -301,14 +301,14 @@ public:
/// the current one.
/// @brief Determine if one instruction is the same operation as another.
bool isSameOperationAs(const Instruction *I, unsigned flags = 0) const;
-
+
/// isUsedOutsideOfBlock - Return true if there are any uses of this
/// instruction in blocks other than the specified block. Note that PHI nodes
/// are considered to evaluate their operands in the corresponding predecessor
/// block.
bool isUsedOutsideOfBlock(const BasicBlock *BB) const;
-
-
+
+
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static inline bool classof(const Value *V) {
return V->getValueID() >= Value::InstructionVal;
@@ -360,34 +360,34 @@ private:
unsigned short getSubclassDataFromValue() const {
return Value::getSubclassDataFromValue();
}
-
+
void setHasMetadataHashEntry(bool V) {
setValueSubclassData((getSubclassDataFromValue() & ~HasMetadataBit) |
(V ? HasMetadataBit : 0));
}
-
+
friend class SymbolTableListTraits<Instruction, BasicBlock>;
void setParent(BasicBlock *P);
protected:
// Instruction subclasses can stick up to 15 bits of stuff into the
// SubclassData field of instruction with these members.
-
+
// Verify that only the low 15 bits are used.
void setInstructionSubclassData(unsigned short D) {
assert((D & HasMetadataBit) == 0 && "Out of range value put into field");
setValueSubclassData((getSubclassDataFromValue() & HasMetadataBit) | D);
}
-
+
unsigned getSubclassDataFromInstruction() const {
return getSubclassDataFromValue() & ~HasMetadataBit;
}
-
+
Instruction(Type *Ty, unsigned iType, Use *Ops, unsigned NumOps,
Instruction *InsertBefore = 0);
Instruction(Type *Ty, unsigned iType, Use *Ops, unsigned NumOps,
BasicBlock *InsertAtEnd);
virtual Instruction *clone_impl() const = 0;
-
+
};
// Instruction* is only 4-byte aligned.
@@ -401,7 +401,7 @@ public:
}
enum { NumLowBitsAvailable = 2 };
};
-
+
} // End llvm namespace
#endif
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index 42b9da6914..0e27324ad3 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -271,6 +271,10 @@ let Properties = [IntrReadMem] in {
def int_exp2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
def int_fabs : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
def int_floor : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_ceil : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_trunc : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_rint : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_nearbyint : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
}
let Properties = [IntrNoMem] in {
@@ -494,7 +498,6 @@ def int_nacl_target_arch : Intrinsic<[llvm_i32_ty], []>,
include "llvm/IntrinsicsPowerPC.td"
include "llvm/IntrinsicsX86.td"
include "llvm/IntrinsicsARM.td"
-include "llvm/IntrinsicsCellSPU.td"
include "llvm/IntrinsicsXCore.td"
include "llvm/IntrinsicsHexagon.td"
include "llvm/IntrinsicsNVVM.td"
diff --git a/include/llvm/IntrinsicsCellSPU.td b/include/llvm/IntrinsicsCellSPU.td
deleted file mode 100644
index 1e311bbecb..0000000000
--- a/include/llvm/IntrinsicsCellSPU.td
+++ /dev/null
@@ -1,242 +0,0 @@
-//==- IntrinsicsCellSPU.td - Cell SDK intrinsics -*- tablegen -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// Department at The Aerospace Corporation and is distributed under the
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Cell SPU Instructions:
-//===----------------------------------------------------------------------===//
-// TODO Items (not urgent today, but would be nice, low priority)
-//
-// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by
-// concatenating the byte argument b as "bbbb". Could recognize this bit pattern
-// in 16-bit and 32-bit constants and reduce instruction count.
-//===----------------------------------------------------------------------===//
-
-// 7-bit integer type, used as an immediate:
-def cell_i7_ty: LLVMType<i8>;
-def cell_i8_ty: LLVMType<i8>;
-
-// Keep this here until it's actually supported:
-def llvm_i128_ty : LLVMType<i128>;
-
-class v16i8_u7imm<string builtin_suffix> :
- GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, cell_i7_ty],
- [IntrNoMem]>;
-
-class v16i8_u8imm<string builtin_suffix> :
- GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i8_ty],
- [IntrNoMem]>;
-
-class v16i8_s10imm<string builtin_suffix> :
- GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i16_ty],
- [IntrNoMem]>;
-
-class v16i8_u16imm<string builtin_suffix> :
- GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i16_ty],
- [IntrNoMem]>;
-
-class v16i8_rr<string builtin_suffix> :
- GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
- [IntrNoMem]>;
-
-class v8i16_s10imm<string builtin_suffix> :
- GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i16_ty],
- [IntrNoMem]>;
-
-class v8i16_u16imm<string builtin_suffix> :
- GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i16_ty],
- [IntrNoMem]>;
-
-class v8i16_rr<string builtin_suffix> :
- GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
- [IntrNoMem]>;
-
-class v4i32_rr<string builtin_suffix> :
- GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
- [IntrNoMem]>;
-
-class v4i32_u7imm<string builtin_suffix> :
- GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, cell_i7_ty],
- [IntrNoMem]>;
-
-class v4i32_s10imm<string builtin_suffix> :
- GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i16_ty],
- [IntrNoMem]>;
-
-class v4i32_u16imm<string builtin_suffix> :
- GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i16_ty],
- [IntrNoMem]>;
-
-class v4f32_rr<string builtin_suffix> :
- GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
- [IntrNoMem]>;
-
-class v4f32_rrr<string builtin_suffix> :
- GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
- [IntrNoMem]>;
-
-class v2f64_rr<string builtin_suffix> :
- GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
- Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty],
- [IntrNoMem]>;
-
-// All Cell SPU intrinsics start with "llvm.spu.".
-let TargetPrefix = "spu" in {
- def int_spu_si_fsmbi : v8i16_u16imm<"fsmbi">;
- def int_spu_si_ah : v8i16_rr<"ah">;
- def int_spu_si_ahi : v8i16_s10imm<"ahi">;
- def int_spu_si_a : v4i32_rr<"a">;
- def int_spu_si_ai : v4i32_s10imm<"ai">;
- def int_spu_si_sfh : v8i16_rr<"sfh">;
- def int_spu_si_sfhi : v8i16_s10imm<"sfhi">;
- def int_spu_si_sf : v4i32_rr<"sf">;
- def int_spu_si_sfi : v4i32_s10imm<"sfi">;
- def int_spu_si_addx : v4i32_rr<"addx">;
- def int_spu_si_cg : v4i32_rr<"cg">;
- def int_spu_si_cgx : v4i32_rr<"cgx">;
- def int_spu_si_sfx : v4i32_rr<"sfx">;
- def int_spu_si_bg : v4i32_rr<"bg">;
- def int_spu_si_bgx : v4i32_rr<"bgx">;
- def int_spu_si_mpy : // This is special:
- GCCBuiltin<"__builtin_si_mpy">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
- [IntrNoMem]>;
- def int_spu_si_mpyu : // This is special:
- GCCBuiltin<"__builtin_si_mpyu">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
- [IntrNoMem]>;
- def int_spu_si_mpyi : // This is special:
- GCCBuiltin<"__builtin_si_mpyi">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i16_ty],
- [IntrNoMem]>;
- def int_spu_si_mpyui : // This is special:
- GCCBuiltin<"__builtin_si_mpyui">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i16_ty],
- [IntrNoMem]>;
- def int_spu_si_mpya : // This is special:
- GCCBuiltin<"__builtin_si_mpya">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
- [IntrNoMem]>;
- def int_spu_si_mpyh : // This is special:
- GCCBuiltin<"__builtin_si_mpyh">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty],
- [IntrNoMem]>;
- def int_spu_si_mpys : // This is special:
- GCCBuiltin<"__builtin_si_mpys">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
- [IntrNoMem]>;
- def int_spu_si_mpyhh : // This is special:
- GCCBuiltin<"__builtin_si_mpyhh">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
- [IntrNoMem]>;
- def int_spu_si_mpyhha : // This is special:
- GCCBuiltin<"__builtin_si_mpyhha">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
- [IntrNoMem]>;
- def int_spu_si_mpyhhu : // This is special:
- GCCBuiltin<"__builtin_si_mpyhhu">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
- [IntrNoMem]>;
- def int_spu_si_mpyhhau : // This is special:
- GCCBuiltin<"__builtin_si_mpyhhau">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
- [IntrNoMem]>;
-
- def int_spu_si_shli: v4i32_u7imm<"shli">;
-
- def int_spu_si_shlqbi:
- GCCBuiltin<!strconcat("__builtin_si_", "shlqbi")>,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty],
- [IntrNoMem]>;
-
- def int_spu_si_shlqbii: v16i8_u7imm<"shlqbii">;
- def int_spu_si_shlqby:
- GCCBuiltin<!strconcat("__builtin_si_", "shlqby")>,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty],
- [IntrNoMem]>;
- def int_spu_si_shlqbyi: v16i8_u7imm<"shlqbyi">;
-
- def int_spu_si_ceq: v4i32_rr<"ceq">;
- def int_spu_si_ceqi: v4i32_s10imm<"ceqi">;
- def int_spu_si_ceqb: v16i8_rr<"ceqb">;
- def int_spu_si_ceqbi: v16i8_u8imm<"ceqbi">;
- def int_spu_si_ceqh: v8i16_rr<"ceqh">;
- def int_spu_si_ceqhi: v8i16_s10imm<"ceqhi">;
- def int_spu_si_cgt: v4i32_rr<"cgt">;
- def int_spu_si_cgti: v4i32_s10imm<"cgti">;
- def int_spu_si_cgtb: v16i8_rr<"cgtb">;
- def int_spu_si_cgtbi: v16i8_u8imm<"cgtbi">;
- def int_spu_si_cgth: v8i16_rr<"cgth">;
- def int_spu_si_cgthi: v8i16_s10imm<"cgthi">;
- def int_spu_si_clgtb: v16i8_rr<"clgtb">;
- def int_spu_si_clgtbi: v16i8_u8imm<"clgtbi">;
- def int_spu_si_clgth: v8i16_rr<"clgth">;
- def int_spu_si_clgthi: v8i16_s10imm<"clgthi">;
- def int_spu_si_clgt: v4i32_rr<"clgt">;
- def int_spu_si_clgti: v4i32_s10imm<"clgti">;
-
- def int_spu_si_and: v4i32_rr<"and">;
- def int_spu_si_andbi: v16i8_u8imm<"andbi">;
- def int_spu_si_andc: v4i32_rr<"andc">;
- def int_spu_si_andhi: v8i16_s10imm<"andhi">;
- def int_spu_si_andi: v4i32_s10imm<"andi">;
-
- def int_spu_si_or: v4i32_rr<"or">;
- def int_spu_si_orbi: v16i8_u8imm<"orbi">;
- def int_spu_si_orc: v4i32_rr<"orc">;
- def int_spu_si_orhi: v8i16_s10imm<"orhi">;
- def int_spu_si_ori: v4i32_s10imm<"ori">;
-
- def int_spu_si_xor: v4i32_rr<"xor">;
- def int_spu_si_xorbi: v16i8_u8imm<"xorbi">;
- def int_spu_si_xorhi: v8i16_s10imm<"xorhi">;
- def int_spu_si_xori: v4i32_s10imm<"xori">;
-
- def int_spu_si_nor: v4i32_rr<"nor">;
- def int_spu_si_nand: v4i32_rr<"nand">;
-
- def int_spu_si_fa: v4f32_rr<"fa">;
- def int_spu_si_fs: v4f32_rr<"fs">;
- def int_spu_si_fm: v4f32_rr<"fm">;
-
- def int_spu_si_fceq: v4f32_rr<"fceq">;
- def int_spu_si_fcmeq: v4f32_rr<"fcmeq">;
- def int_spu_si_fcgt: v4f32_rr<"fcgt">;
- def int_spu_si_fcmgt: v4f32_rr<"fcmgt">;
-
- def int_spu_si_fma: v4f32_rrr<"fma">;
- def int_spu_si_fnms: v4f32_rrr<"fnms">;
- def int_spu_si_fms: v4f32_rrr<"fms">;
-
- def int_spu_si_dfa: v2f64_rr<"dfa">;
- def int_spu_si_dfs: v2f64_rr<"dfs">;
- def int_spu_si_dfm: v2f64_rr<"dfm">;
-
-//def int_spu_si_dfceq: v2f64_rr<"dfceq">;
-//def int_spu_si_dfcmeq: v2f64_rr<"dfcmeq">;
-//def int_spu_si_dfcgt: v2f64_rr<"dfcgt">;
-//def int_spu_si_dfcmgt: v2f64_rr<"dfcmgt">;
-
- def int_spu_si_dfnma: v2f64_rr<"dfnma">;
- def int_spu_si_dfma: v2f64_rr<"dfma">;
- def int_spu_si_dfnms: v2f64_rr<"dfnms">;
- def int_spu_si_dfms: v2f64_rr<"dfms">;
-}
diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h
index 8fc437f3e6..92e76121c0 100644
--- a/include/llvm/MC/MCDwarf.h
+++ b/include/llvm/MC/MCDwarf.h
@@ -16,7 +16,6 @@
#define LLVM_MC_MCDWARF_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/Compiler.h"
@@ -266,42 +265,115 @@ namespace llvm {
class MCCFIInstruction {
public:
- enum OpType { SameValue, RememberState, RestoreState, Move, RelMove, Escape,
- Restore};
+ enum OpType { OpSameValue, OpRememberState, OpRestoreState, OpOffset,
+ OpDefCfaRegister, OpDefCfaOffset, OpDefCfa, OpRelOffset,
+ OpAdjustCfaOffset, OpEscape, OpRestore, OpUndefined,
+ OpRegister };
private:
OpType Operation;
MCSymbol *Label;
- // Move to & from location.
- MachineLocation Destination;
- MachineLocation Source;
+ unsigned Register;
+ union {
+ int Offset;
+ unsigned Register2;
+ };
std::vector<char> Values;
+
+ MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int O, StringRef V) :
+ Operation(Op), Label(L), Register(R), Offset(O),
+ Values(V.begin(), V.end()) {
+ assert(Op != OpRegister);
+ }
+
+ MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R1, unsigned R2) :
+ Operation(Op), Label(L), Register(R1), Register2(R2) {
+ assert(Op == OpRegister);
+ }
+
public:
- MCCFIInstruction(OpType Op, MCSymbol *L)
- : Operation(Op), Label(L) {
- assert(Op == RememberState || Op == RestoreState);
+ static MCCFIInstruction
+ createOffset(MCSymbol *L, unsigned Register, int Offset) {
+ return MCCFIInstruction(OpOffset, L, Register, Offset, "");
}
- MCCFIInstruction(OpType Op, MCSymbol *L, unsigned Register)
- : Operation(Op), Label(L), Destination(Register) {
- assert(Op == SameValue || Op == Restore);
+
+ static MCCFIInstruction
+ createDefCfaRegister(MCSymbol *L, unsigned Register) {
+ return MCCFIInstruction(OpDefCfaRegister, L, Register, 0, "");
}
- MCCFIInstruction(MCSymbol *L, const MachineLocation &D,
- const MachineLocation &S)
- : Operation(Move), Label(L), Destination(D), Source(S) {
+
+ static MCCFIInstruction createDefCfaOffset(MCSymbol *L, int Offset) {
+ return MCCFIInstruction(OpDefCfaOffset, L, 0, -Offset, "");
}
- MCCFIInstruction(OpType Op, MCSymbol *L, const MachineLocation &D,
- const MachineLocation &S)
- : Operation(Op), Label(L), Destination(D), Source(S) {
- assert(Op == RelMove);
+
+ static MCCFIInstruction
+ createDefCfa(MCSymbol *L, unsigned Register, int Offset) {
+ return MCCFIInstruction(OpDefCfa, L, Register, -Offset, "");
}
- MCCFIInstruction(OpType Op, MCSymbol *L, StringRef Vals)
- : Operation(Op), Label(L), Values(Vals.begin(), Vals.end()) {
- assert(Op == Escape);
+
+ static MCCFIInstruction createUndefined(MCSymbol *L, unsigned Register) {
+ return MCCFIInstruction(OpUndefined, L, Register, 0, "");
+ }
+
+ static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register) {
+ return MCCFIInstruction(OpRestore, L, Register, 0, "");
+ }
+
+ static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register) {
+ return MCCFIInstruction(OpSameValue, L, Register, 0, "");
+ }
+
+ static MCCFIInstruction createRestoreState(MCSymbol *L) {
+ return MCCFIInstruction(OpRestoreState, L, 0, 0, "");
+ }
+
+ static MCCFIInstruction createRememberState(MCSymbol *L) {
+ return MCCFIInstruction(OpRememberState, L, 0, 0, "");
}
+
+ static MCCFIInstruction
+ createRelOffset(MCSymbol *L, unsigned Register, int Offset) {
+ return MCCFIInstruction(OpRelOffset, L, Register, Offset, "");
+ }
+
+ static MCCFIInstruction
+ createAdjustCfaOffset(MCSymbol *L, int Adjustment) {
+ return MCCFIInstruction(OpAdjustCfaOffset, L, 0, Adjustment, "");
+ }
+
+ static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals) {
+ return MCCFIInstruction(OpEscape, L, 0, 0, Vals);
+ }
+
+ static MCCFIInstruction
+ createRegister(MCSymbol *L, unsigned Register1, unsigned Register2) {
+ return MCCFIInstruction(OpRegister, L, Register1, Register2);
+ }
+
OpType getOperation() const { return Operation; }
MCSymbol *getLabel() const { return Label; }
- const MachineLocation &getDestination() const { return Destination; }
- const MachineLocation &getSource() const { return Source; }
+
+ unsigned getRegister() const {
+ assert(Operation == OpDefCfa || Operation == OpOffset ||
+ Operation == OpRestore || Operation == OpUndefined ||
+ Operation == OpSameValue || Operation == OpDefCfaRegister ||
+ Operation == OpRelOffset || Operation == OpRegister);
+ return Register;
+ }
+
+ unsigned getRegister2() const {
+ assert(Operation == OpRegister);
+ return Register2;
+ }
+
+ int getOffset() const {
+ assert(Operation == OpDefCfa || Operation == OpOffset ||
+ Operation == OpRelOffset || Operation == OpDefCfaOffset ||
+ Operation == OpAdjustCfaOffset);
+ return Offset;
+ }
+
const StringRef getValues() const {
+ assert(Operation == OpEscape);
return StringRef(&Values[0], Values.size());
}
};
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index 00eef270d6..1007aa5264 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -197,7 +197,11 @@ public:
VK_Mips_GOT_PAGE,
VK_Mips_GOT_OFST,
VK_Mips_HIGHER,
- VK_Mips_HIGHEST
+ VK_Mips_HIGHEST,
+ VK_Mips_GOT_HI16,
+ VK_Mips_GOT_LO16,
+ VK_Mips_CALL_HI16,
+ VK_Mips_CALL_LO16
};
private:
diff --git a/include/llvm/MC/MCInstBuilder.h b/include/llvm/MC/MCInstBuilder.h
new file mode 100644
index 0000000000..c5acb26eec
--- /dev/null
+++ b/include/llvm/MC/MCInstBuilder.h
@@ -0,0 +1,68 @@
+//===-- llvm/MC/MCInstBuilder.h - Simplify creation of MCInsts --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MCInstBuilder class for convenient creation of
+// MCInsts.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCINSTBUILDER_H
+#define LLVM_MC_MCINSTBUILDER_H
+
+#include "llvm/MC/MCInst.h"
+
+namespace llvm {
+
+class MCInstBuilder {
+ MCInst Inst;
+
+public:
+ /// \brief Create a new MCInstBuilder for an MCInst with a specific opcode.
+ MCInstBuilder(unsigned Opcode) {
+ Inst.setOpcode(Opcode);
+ }
+
+ /// \brief Add a new register operand.
+ MCInstBuilder &addReg(unsigned Reg) {
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return *this;
+ }
+
+ /// \brief Add a new integer immediate operand.
+ MCInstBuilder &addImm(int64_t Val) {
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return *this;
+ }
+
+ /// \brief Add a new floating point immediate operand.
+ MCInstBuilder &addFPImm(double Val) {
+ Inst.addOperand(MCOperand::CreateFPImm(Val));
+ return *this;
+ }
+
+ /// \brief Add a new MCExpr operand.
+ MCInstBuilder &addExpr(const MCExpr *Val) {
+ Inst.addOperand(MCOperand::CreateExpr(Val));
+ return *this;
+ }
+
+ /// \brief Add a new MCInst operand.
+ MCInstBuilder &addInst(const MCInst *Val) {
+ Inst.addOperand(MCOperand::CreateInst(Val));
+ return *this;
+ }
+
+ operator MCInst&() {
+ return Inst;
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 40f83bf5d5..10cb5580fe 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -55,6 +55,7 @@ namespace llvm {
std::vector<MCDwarfFrameInfo> FrameInfos;
MCDwarfFrameInfo *getCurrentFrameInfo();
+ MCSymbol *EmitCFICommon();
void EnsureValidFrame();
std::vector<MCWin64EHUnwindInfo *> W64UnwindInfos;
@@ -536,6 +537,8 @@ namespace llvm {
virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment);
virtual void EmitCFIEscape(StringRef Values);
virtual void EmitCFISignalFrame();
+ virtual void EmitCFIUndefined(int64_t Register);
+ virtual void EmitCFIRegister(int64_t Register1, int64_t Register2);
virtual void EmitWin64EHStartProc(const MCSymbol *Symbol);
virtual void EmitWin64EHEndProc();
diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h
index f3d824960c..9cd587195a 100644
--- a/include/llvm/Object/Archive.h
+++ b/include/llvm/Object/Archive.h
@@ -122,6 +122,16 @@ public:
Archive(MemoryBuffer *source, error_code &ec);
+ enum Kind {
+ K_GNU,
+ K_BSD,
+ K_COFF
+ };
+
+ Kind kind() const {
+ return Format;
+ }
+
child_iterator begin_children(bool skip_internal = true) const;
child_iterator end_children() const;
@@ -133,9 +143,13 @@ public:
return v->isArchive();
}
+ // check if a symbol is in the archive
+ child_iterator findSym(StringRef name) const;
+
private:
child_iterator SymbolTable;
child_iterator StringTable;
+ Kind Format;
};
}
diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h
index 466de93a78..a2723c77e9 100644
--- a/include/llvm/Object/ELF.h
+++ b/include/llvm/Object/ELF.h
@@ -609,6 +609,7 @@ public:
const Elf_Dyn *getDyn(DataRefImpl DynData) const;
error_code getSymbolVersion(SymbolRef Symb, StringRef &Version,
bool &IsDefault) const;
+ uint64_t getSymbolIndex(const Elf_Sym *sym) const;
protected:
virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
@@ -2094,6 +2095,21 @@ ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object
}
}
+// Get the symbol table index in the symtab section given a symbol
+template<support::endianness target_endianness, bool is64Bits>
+uint64_t ELFObjectFile<target_endianness, is64Bits>
+ ::getSymbolIndex(const Elf_Sym *Sym) const {
+ assert(SymbolTableSections.size() == 1 && "Only one symbol table supported!");
+ const Elf_Shdr *SymTab = *SymbolTableSections.begin();
+ uintptr_t SymLoc = uintptr_t(Sym);
+ uintptr_t SymTabLoc = uintptr_t(base() + SymTab->sh_offset);
+ assert(SymLoc > SymTabLoc && "Symbol not in symbol table!");
+ uint64_t SymOffset = SymLoc - SymTabLoc;
+ assert(SymOffset % SymTab->sh_entsize == 0 &&
+ "Symbol not multiple of symbol size!");
+ return SymOffset / SymTab->sh_entsize;
+}
+
template<support::endianness target_endianness, bool is64Bits>
symbol_iterator ELFObjectFile<target_endianness, is64Bits>
::begin_symbols() const {
diff --git a/include/llvm/Object/RelocVisitor.h b/include/llvm/Object/RelocVisitor.h
index 7668bdedb7..1370c71a7d 100644
--- a/include/llvm/Object/RelocVisitor.h
+++ b/include/llvm/Object/RelocVisitor.h
@@ -76,25 +76,6 @@ private:
/// Operations
- // Width is the width in bytes of the extend.
- RelocToApply zeroExtend(RelocToApply r, char Width) {
- if (Width == r.Width)
- return r;
- r.Value &= (1 << ((Width * 8))) - 1;
- return r;
- }
- RelocToApply signExtend(RelocToApply r, char Width) {
- if (Width == r.Width)
- return r;
- bool SignBit = r.Value & (1 << ((Width * 8) - 1));
- if (SignBit) {
- r.Value |= ~((1 << (Width * 8)) - 1);
- } else {
- r.Value &= (1 << (Width * 8)) - 1;
- }
- return r;
- }
-
/// X86-64 ELF
RelocToApply visitELF_X86_64_NONE(RelocationRef R) {
return RelocToApply(0, 0);
diff --git a/include/llvm/Operator.h b/include/llvm/Operator.h
index b326c11352..4f89377a89 100644
--- a/include/llvm/Operator.h
+++ b/include/llvm/Operator.h
@@ -131,21 +131,21 @@ public:
enum {
IsExact = (1 << 0)
};
-
+
private:
friend class BinaryOperator;
friend class ConstantExpr;
void setIsExact(bool B) {
SubclassOptionalData = (SubclassOptionalData & ~IsExact) | (B * IsExact);
}
-
+
public:
/// isExact - Test whether this division is known to be exact, with
/// zero remainder.
bool isExact() const {
return SubclassOptionalData & IsExact;
}
-
+
static bool isPossiblyExactOpcode(unsigned OpC) {
return OpC == Instruction::SDiv ||
OpC == Instruction::UDiv ||
@@ -182,7 +182,7 @@ public:
}
};
-
+
/// ConcreteOperator - A helper template for defining operators for individual
/// opcodes.
template<typename SuperClass, unsigned Opc>
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index cd651db1f1..7b6f169666 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -227,10 +227,20 @@ public:
/// createPrinterPass - Get a module printer pass.
Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
+ /// doInitialization - Virtual method overridden by subclasses to do
+ /// any necessary initialization.
+ ///
+ virtual bool doInitialization() { return false; }
+
/// runOnModule - Virtual method overriden by subclasses to process the module
/// being operated on.
virtual bool runOnModule(Module &M) = 0;
+ /// doFinalization - Virtual method overriden by subclasses to do any post
+ /// processing needed after all passes have run.
+ ///
+ virtual bool doFinalization() { return false; }
+
virtual void assignPassManager(PMStack &PMS,
PassManagerType T);
diff --git a/include/llvm/PassManager.h b/include/llvm/PassManager.h
index ce5fda79f9..1d5e800b4d 100644
--- a/include/llvm/PassManager.h
+++ b/include/llvm/PassManager.h
@@ -58,6 +58,14 @@ public:
/// whether any of the passes modifies the module, and if so, return true.
bool run(Module &M);
+ /// doInitialization - Run all of the initializers for the module passes.
+ ///
+ bool doInitialization();
+
+ /// doFinalization - Run all of the finalizers for the module passes.
+ ///
+ bool doFinalization();
+
private:
/// PassManagerImpl_New is the actual class. PassManager is just the
/// wraper to publish simple pass manager interface
diff --git a/include/llvm/PassManagers.h b/include/llvm/PassManagers.h
index 0af5853380..b0450f3e00 100644
--- a/include/llvm/PassManagers.h
+++ b/include/llvm/PassManagers.h
@@ -168,7 +168,7 @@ class PMTopLevelManager {
protected:
explicit PMTopLevelManager(PMDataManager *PMDM);
- virtual unsigned getNumContainedManagers() const {
+ unsigned getNumContainedManagers() const {
return (unsigned)PassManagers.size();
}
@@ -343,7 +343,7 @@ public:
void dumpRequiredSet(const Pass *P) const;
void dumpPreservedSet(const Pass *P) const;
- virtual unsigned getNumContainedPasses() const {
+ unsigned getNumContainedPasses() const {
return (unsigned)PassVector.size();
}
@@ -420,10 +420,20 @@ public:
/// cleanup - After running all passes, clean up pass manager cache.
void cleanup();
+ /// doInitialization - Overrides ModulePass doInitialization for global
+ /// initialization tasks
+ ///
+ using ModulePass::doInitialization;
+
/// doInitialization - Run all of the initializers for the function passes.
///
bool doInitialization(Module &M);
+ /// doFinalization - Overrides ModulePass doFinalization for global
+ /// finalization tasks
+ ///
+ using ModulePass::doFinalization;
+
/// doFinalization - Run all of the finalizers for the function passes.
///
bool doFinalization(Module &M);
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index 7ceeb32121..5acc7160ab 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -143,8 +143,8 @@
#define TEMPLATE_INSTANTIATION(X)
#endif
-// LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so,
-// mark a method "not for inlining".
+/// LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so,
+/// mark a method "not for inlining".
#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
#define LLVM_ATTRIBUTE_NOINLINE __attribute__((noinline))
#elif defined(_MSC_VER)
@@ -153,10 +153,10 @@
#define LLVM_ATTRIBUTE_NOINLINE
#endif
-// LLVM_ATTRIBUTE_ALWAYS_INLINE - On compilers where we have a directive to do
-// so, mark a method "always inline" because it is performance sensitive. GCC
-// 3.4 supported this but is buggy in various cases and produces unimplemented
-// errors, just use it in GCC 4.0 and later.
+/// LLVM_ATTRIBUTE_ALWAYS_INLINE - On compilers where we have a directive to do
+/// so, mark a method "always inline" because it is performance sensitive. GCC
+/// 3.4 supported this but is buggy in various cases and produces unimplemented
+/// errors, just use it in GCC 4.0 and later.
#if __GNUC__ > 3
#define LLVM_ATTRIBUTE_ALWAYS_INLINE inline __attribute__((always_inline))
#elif defined(_MSC_VER)
@@ -174,8 +174,8 @@
#define LLVM_ATTRIBUTE_NORETURN
#endif
-// LLVM_EXTENSION - Support compilers where we have a keyword to suppress
-// pedantic diagnostics.
+/// LLVM_EXTENSION - Support compilers where we have a keyword to suppress
+/// pedantic diagnostics.
#ifdef __GNUC__
#define LLVM_EXTENSION __extension__
#else
@@ -197,16 +197,16 @@
decl
#endif
-// LLVM_BUILTIN_UNREACHABLE - On compilers which support it, expands
-// to an expression which states that it is undefined behavior for the
-// compiler to reach this point. Otherwise is not defined.
+/// LLVM_BUILTIN_UNREACHABLE - On compilers which support it, expands
+/// to an expression which states that it is undefined behavior for the
+/// compiler to reach this point. Otherwise is not defined.
#if defined(__clang__) || (__GNUC__ > 4) \
|| (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
# define LLVM_BUILTIN_UNREACHABLE __builtin_unreachable()
#endif
-// LLVM_BUILTIN_TRAP - On compilers which support it, expands to an expression
-// which causes the program to exit abnormally.
+/// LLVM_BUILTIN_TRAP - On compilers which support it, expands to an expression
+/// which causes the program to exit abnormally.
#if defined(__clang__) || (__GNUC__ > 4) \
|| (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
# define LLVM_BUILTIN_TRAP __builtin_trap()
diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/Support/Dwarf.h
index 8f18a991a9..697ef45634 100644
--- a/include/llvm/Support/Dwarf.h
+++ b/include/llvm/Support/Dwarf.h
@@ -231,6 +231,10 @@ enum dwarf_constants {
DW_AT_const_expr = 0x6c,
DW_AT_enum_class = 0x6d,
DW_AT_linkage_name = 0x6e,
+
+ DW_AT_lo_user = 0x2000,
+ DW_AT_hi_user = 0x3fff,
+
DW_AT_MIPS_loop_begin = 0x2002,
DW_AT_MIPS_tail_loop_begin = 0x2003,
DW_AT_MIPS_epilog_begin = 0x2004,
@@ -246,6 +250,12 @@ enum dwarf_constants {
DW_AT_MIPS_ptr_dopetype = 0x200e,
DW_AT_MIPS_allocatable_dopetype = 0x200f,
DW_AT_MIPS_assumed_shape_dopetype = 0x2010,
+
+ // This one appears to have only been implemented by Open64 for
+ // fortran and may conflict with other extensions.
+ DW_AT_MIPS_assumed_size = 0x2011,
+
+ // GNU extensions
DW_AT_sf_names = 0x2101,
DW_AT_src_info = 0x2102,
DW_AT_mac_info = 0x2103,
@@ -254,9 +264,14 @@ enum dwarf_constants {
DW_AT_body_end = 0x2106,
DW_AT_GNU_vector = 0x2107,
DW_AT_GNU_template_name = 0x2110,
- DW_AT_MIPS_assumed_size = 0x2011,
- DW_AT_lo_user = 0x2000,
- DW_AT_hi_user = 0x3fff,
+
+ // Extensions for Fission proposal.
+ DW_AT_GNU_dwo_name = 0x2130,
+ DW_AT_GNU_dwo_id = 0x2131,
+ DW_AT_GNU_ranges_base = 0x2132,
+ DW_AT_GNU_addr_base = 0x2133,
+ DW_AT_GNU_pubnames = 0x2134,
+ DW_AT_GNU_pubtypes = 0x2135,
// Apple extensions.
DW_AT_APPLE_optimized = 0x3fe1,
@@ -300,6 +315,10 @@ enum dwarf_constants {
DW_FORM_flag_present = 0x19,
DW_FORM_ref_sig8 = 0x20,
+ // Extensions for Fission proposal
+ DW_FORM_GNU_addr_index = 0x1f01,
+ DW_FORM_GNU_str_index = 0x1f02,
+
// Operation encodings
DW_OP_addr = 0x03,
DW_OP_deref = 0x06,
@@ -458,6 +477,10 @@ enum dwarf_constants {
DW_OP_lo_user = 0xe0,
DW_OP_hi_user = 0xff,
+ // Extensions for Fission proposal.
+ DW_OP_GNU_addr_index = 0xfb,
+ DW_OP_GNU_const_index = 0xfc,
+
// Encoding attribute values
DW_ATE_address = 0x01,
DW_ATE_boolean = 0x02,
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index a67a6ac09e..0b60150168 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -458,7 +458,9 @@ enum {
R_PPC_REL14 = 11,
R_PPC_REL14_BRTAKEN = 12,
R_PPC_REL14_BRNTAKEN = 13,
- R_PPC_REL32 = 26
+ R_PPC_REL32 = 26,
+ R_PPC_TPREL16_LO = 70,
+ R_PPC_TPREL16_HA = 72
};
// ELF Relocation types for PPC64
diff --git a/include/llvm/Support/GetElementPtrTypeIterator.h b/include/llvm/Support/GetElementPtrTypeIterator.h
index ef92c95ee7..93dc41fbdc 100644
--- a/include/llvm/Support/GetElementPtrTypeIterator.h
+++ b/include/llvm/Support/GetElementPtrTypeIterator.h
@@ -83,15 +83,15 @@ namespace llvm {
typedef generic_gep_type_iterator<> gep_type_iterator;
inline gep_type_iterator gep_type_begin(const User *GEP) {
- return gep_type_iterator::begin(GEP->getOperand(0)->getType(),
- GEP->op_begin()+1);
+ return gep_type_iterator::begin
+ (GEP->getOperand(0)->getType()->getScalarType(), GEP->op_begin()+1);
}
inline gep_type_iterator gep_type_end(const User *GEP) {
return gep_type_iterator::end(GEP->op_end());
}
inline gep_type_iterator gep_type_begin(const User &GEP) {
- return gep_type_iterator::begin(GEP.getOperand(0)->getType(),
- GEP.op_begin()+1);
+ return gep_type_iterator::begin
+ (GEP.getOperand(0)->getType()->getScalarType(), GEP.op_begin()+1);
}
inline gep_type_iterator gep_type_end(const User &GEP) {
return gep_type_iterator::end(GEP.op_end());
diff --git a/include/llvm/Support/YAMLParser.h b/include/llvm/Support/YAMLParser.h
index 12958fa173..e3f4f00388 100644
--- a/include/llvm/Support/YAMLParser.h
+++ b/include/llvm/Support/YAMLParser.h
@@ -77,7 +77,11 @@ std::string escape(StringRef Input);
/// documents.
class Stream {
public:
+ /// @brief This keeps a reference to the string referenced by \p Input.
Stream(StringRef Input, SourceMgr &);
+
+ /// @brief This takes ownership of \p InputBuffer.
+ Stream(MemoryBuffer *InputBuffer, SourceMgr &);
~Stream();
document_iterator begin();
diff --git a/include/llvm/Support/circular_raw_ostream.h b/include/llvm/Support/circular_raw_ostream.h
index 2823af33b7..9000306857 100644
--- a/include/llvm/Support/circular_raw_ostream.h
+++ b/include/llvm/Support/circular_raw_ostream.h
@@ -71,7 +71,7 @@ namespace llvm
/// flushBuffer - Dump the contents of the buffer to Stream.
///
- void flushBuffer(void) {
+ void flushBuffer() {
if (Filled)
// Write the older portion of the buffer.
TheStream->write(Cur, BufferArray + BufferSize - Cur);
@@ -151,7 +151,7 @@ namespace llvm
/// flushBufferWithBanner - Force output of the buffer along with
/// a small header.
///
- void flushBufferWithBanner(void);
+ void flushBufferWithBanner();
private:
/// releaseStream - Delete the held stream if needed. Otherwise,
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 4570813ba6..d2e06114d8 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -435,7 +435,7 @@ public:
SmallVectorImpl<MachineOperand> &Cond,
unsigned &TrueOp, unsigned &FalseOp,
bool &Optimizable) const {
- assert(MI && MI->isSelect() && "MI must be a select instruction");
+ assert(MI && MI->getDesc().isSelect() && "MI must be a select instruction");
return true;
}
@@ -621,6 +621,26 @@ public:
return false;
}
+ /// \brief Get the base register and byte offset of a load/store instr.
+ virtual bool getLdStBaseRegImmOfs(MachineInstr *LdSt,
+ unsigned &BaseReg, unsigned &Offset,
+ const TargetRegisterInfo *TRI) const {
+ return false;
+ }
+
+ virtual bool shouldClusterLoads(MachineInstr *FirstLdSt,
+ MachineInstr *SecondLdSt,
+ unsigned NumLoads) const {
+ return false;
+ }
+
+ /// \brief Can this target fuse the given instructions if they are scheduled
+ /// adjacent.
+ virtual bool shouldScheduleAdjacent(MachineInstr* First,
+ MachineInstr *Second) const {
+ return false;
+ }
+
/// ReverseBranchCondition - Reverses the branch condition of the specified
/// condition list, returning false on success and true if it cannot be
/// reversed.
diff --git a/include/llvm/Target/TargetLibraryInfo.h b/include/llvm/Target/TargetLibraryInfo.h
index a2c97d782e..f1dd1f4bbe 100644
--- a/include/llvm/Target/TargetLibraryInfo.h
+++ b/include/llvm/Target/TargetLibraryInfo.h
@@ -49,6 +49,8 @@ namespace llvm {
cxa_guard_release,
/// void *__memcpy_chk(void *s1, const void *s2, size_t n, size_t s1size);
memcpy_chk,
+ /// int abs(int j);
+ abs,
/// double acos(double x);
acos,
/// float acosf(float x);
@@ -153,6 +155,12 @@ namespace llvm {
fabsf,
/// long double fabsl(long double x);
fabsl,
+ /// int ffs(int i);
+ ffs,
+ /// int ffsl(long int i);
+ ffsl,
+ /// int ffsll(long long int i);
+ ffsll,
/// int fiprintf(FILE *stream, const char *format, ...);
fiprintf,
/// double floor(double x);
@@ -167,6 +175,8 @@ namespace llvm {
fmodf,
/// long double fmodl(long double x, long double y);
fmodl,
+ /// int fprintf(FILE *stream, const char *format, ...);
+ fprintf,
/// int fputc(int c, FILE *stream);
fputc,
/// int fputs(const char *s, FILE *stream);
@@ -178,6 +188,14 @@ namespace llvm {
fwrite,
/// int iprintf(const char *format, ...);
iprintf,
+ /// int isascii(int c);
+ isascii,
+ /// int isdigit(int c);
+ isdigit,
+ /// long int labs(long int j);
+ labs,
+ /// long long int llabs(long long int j);
+ llabs,
/// double log(double x);
log,
/// double log10(double x);
@@ -236,6 +254,8 @@ namespace llvm {
powf,
/// long double powl(long double x, long double y);
powl,
+ /// int printf(const char *format, ...);
+ printf,
/// int putchar(int c);
putchar,
/// int puts(const char *s);
@@ -270,6 +290,8 @@ namespace llvm {
sinl,
/// int siprintf(char *str, const char *format, ...);
siprintf,
+ /// int sprintf(char *str, const char *format, ...);
+ sprintf,
/// double sqrt(double x);
sqrt,
/// float sqrtf(float x);
@@ -337,6 +359,8 @@ namespace llvm {
tanhl,
/// long double tanl(long double x);
tanl,
+ /// int toascii(int c);
+ toascii,
/// double trunc(double x);
trunc,
/// float truncf(float x);
diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h
index 13a6fe37d7..fab63254d9 100644
--- a/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/include/llvm/Target/TargetLoweringObjectFile.h
@@ -27,6 +27,7 @@ namespace llvm {
class MCExpr;
class MCSection;
class MCSymbol;
+ class MCSymbolRefExpr;
class MCStreamer;
class GlobalValue;
class TargetMachine;
@@ -108,13 +109,13 @@ public:
return 0;
}
- /// getExprForDwarfGlobalReference - Return an MCExpr to use for a reference
+ /// getTTypeGlobalReference - Return an MCExpr to use for a reference
/// to the specified global variable from exception handling information.
///
virtual const MCExpr *
- getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
- MachineModuleInfo *MMI, unsigned Encoding,
- MCStreamer &Streamer) const;
+ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const;
// getCFIPersonalitySymbol - The symbol that gets passed to .cfi_personality.
virtual MCSymbol *
@@ -123,8 +124,8 @@ public:
///
const MCExpr *
- getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding,
- MCStreamer &Streamer) const;
+ getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
+ MCStreamer &Streamer) const;
virtual const MCSection *
getStaticCtorSection(unsigned Priority = 65535) const {
diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h
index 6db96d980b..3f22f47a0d 100644
--- a/include/llvm/Target/TargetSubtargetInfo.h
+++ b/include/llvm/Target/TargetSubtargetInfo.h
@@ -54,6 +54,13 @@ public:
return 0;
}
+ /// \brief True if the subtarget should run MachineScheduler after aggressive
+ /// coalescing.
+ ///
+ /// This currently replaces the SelectionDAG scheduler with the "source" order
+ /// scheduler. It does not yet disable the postRA scheduler.
+ virtual bool enableMachineScheduler() const;
+
// enablePostRAScheduler - If the target can benefit from post-regalloc
// scheduling and the specified optimization level meets the requirement
// return true to enable post-register-allocation scheduling. In
diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h
index 8e63aaa4e8..3558251a43 100644
--- a/include/llvm/Transforms/Instrumentation.h
+++ b/include/llvm/Transforms/Instrumentation.h
@@ -41,9 +41,7 @@ FunctionPass *createThreadSanitizerPass();
// BoundsChecking - This pass instruments the code to perform run-time bounds
// checking on loads, stores, and other memory intrinsics.
-// Penalty is the maximum run-time that is acceptable for the user.
-//
-FunctionPass *createBoundsCheckingPass(unsigned Penalty = 5);
+FunctionPass *createBoundsCheckingPass();
} // End llvm namespace
diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index fde452bca2..6bb81be2fd 100644
--- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -31,7 +31,8 @@ namespace llvm {
/// simplifier.
LibCallSimplifierImpl *Impl;
public:
- LibCallSimplifier(const DataLayout *TD, const TargetLibraryInfo *TLI);
+ LibCallSimplifier(const DataLayout *TD, const TargetLibraryInfo *TLI,
+ bool UnsafeFPShrink);
virtual ~LibCallSimplifier();
/// optimizeCall - Take the given call instruction and return a more
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 4bb93ee88a..e8b3063588 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1065,9 +1065,15 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
if (PN > V2)
std::swap(Locs.first, Locs.second);
+ // Find the first incoming phi value not from its parent.
+ unsigned f = 0;
+ while (PN->getIncomingBlock(f) == PN->getParent() &&
+ f < PN->getNumIncomingValues()-1)
+ ++f;
+
AliasResult Alias =
- aliasCheck(PN->getIncomingValue(0), PNSize, PNTBAAInfo,
- PN2->getIncomingValueForBlock(PN->getIncomingBlock(0)),
+ aliasCheck(PN->getIncomingValue(f), PNSize, PNTBAAInfo,
+ PN2->getIncomingValueForBlock(PN->getIncomingBlock(f)),
V2Size, V2TBAAInfo);
if (Alias == MayAlias)
return MayAlias;
@@ -1096,7 +1102,10 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
ArePhisAssumedNoAlias = true;
}
- for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (i == f)
+ continue;
+
AliasResult ThisAlias =
aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo,
PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index 95ac5ea233..684da98ce2 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -145,22 +145,20 @@ void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
// Used to test the dependence analyzer.
-// Looks through the function, noting the first store instruction
-// and the first load instruction
-// (which always follows the first load in our tests).
-// Calls depends() and prints out the result.
+// Looks through the function, noting loads and stores.
+// Calls depends() on every possible pair and prints out the result.
// Ignores all other instructions.
static
void dumpExampleDependence(raw_ostream &OS, Function *F,
DependenceAnalysis *DA) {
for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F);
SrcI != SrcE; ++SrcI) {
- if (const StoreInst *Src = dyn_cast<StoreInst>(&*SrcI)) {
+ if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) {
for (inst_iterator DstI = SrcI, DstE = inst_end(F);
DstI != DstE; ++DstI) {
- if (const LoadInst *Dst = dyn_cast<LoadInst>(&*DstI)) {
+ if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) {
OS << "da analyze - ";
- if (Dependence *D = DA->depends(Src, Dst, true)) {
+ if (Dependence *D = DA->depends(&*SrcI, &*DstI, true)) {
D->dump(OS);
for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
if (D->isSplitable(Level)) {
@@ -173,7 +171,6 @@ void dumpExampleDependence(raw_ostream &OS, Function *F,
}
else
OS << "none!\n";
- return;
}
}
}
@@ -224,8 +221,8 @@ bool Dependence::isScalar(unsigned level) const {
//===----------------------------------------------------------------------===//
// FullDependence methods
-FullDependence::FullDependence(const Instruction *Source,
- const Instruction *Destination,
+FullDependence::FullDependence(Instruction *Source,
+ Instruction *Destination,
bool PossiblyLoopIndependent,
unsigned CommonLevels) :
Dependence(Source, Destination),
@@ -652,10 +649,10 @@ bool isLoadOrStore(const Instruction *I) {
static
-const Value *getPointerOperand(const Instruction *I) {
- if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+Value *getPointerOperand(Instruction *I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
return LI->getPointerOperand();
- if (const StoreInst *SI = dyn_cast<StoreInst>(I))
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->getPointerOperand();
llvm_unreachable("Value is not load or store instruction");
return 0;
@@ -2221,7 +2218,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
FullDependence &Result) const {
DEBUG(dbgs() << "starting gcd\n");
++GCDapplications;
- unsigned BitWidth = Src->getType()->getIntegerBitWidth();
+ unsigned BitWidth = SE->getTypeSizeInBits(Src->getType());
APInt RunningGCD = APInt::getNullValue(BitWidth);
// Examine Src coefficients.
@@ -3197,42 +3194,39 @@ static void dumpSmallBitVector(SmallBitVector &BV) {
// Goff, Kennedy, Tseng
// PLDI 1991
//
-// Care is required to keep the code below up to date w.r.t. this routine.
-Dependence *DependenceAnalysis::depends(const Instruction *Src,
- const Instruction *Dst,
+// Care is required to keep the routine below, getSplitIteration(),
+// up to date with respect to this routine.
+Dependence *DependenceAnalysis::depends(Instruction *Src,
+ Instruction *Dst,
bool PossiblyLoopIndependent) {
if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) ||
(!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory()))
// if both instructions don't reference memory, there's no dependence
return NULL;
- if (!isLoadOrStore(Src) || !isLoadOrStore(Dst))
+ if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
// can only analyze simple loads and stores, i.e., no calls, invokes, etc.
+ DEBUG(dbgs() << "can only handle simple loads and stores\n");
return new Dependence(Src, Dst);
+ }
- const Value *SrcPtr = getPointerOperand(Src);
- const Value *DstPtr = getPointerOperand(Dst);
+ Value *SrcPtr = getPointerOperand(Src);
+ Value *DstPtr = getPointerOperand(Dst);
switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) {
case AliasAnalysis::MayAlias:
case AliasAnalysis::PartialAlias:
// cannot analyse objects if we don't understand their aliasing.
+ DEBUG(dbgs() << "can't analyze may or partial alias\n");
return new Dependence(Src, Dst);
case AliasAnalysis::NoAlias:
// If the objects noalias, they are distinct, accesses are independent.
+ DEBUG(dbgs() << "no alias\n");
return NULL;
case AliasAnalysis::MustAlias:
break; // The underlying objects alias; test accesses for dependence.
}
- const GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
- const GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
- if (!SrcGEP || !DstGEP)
- return new Dependence(Src, Dst); // missing GEP, assume dependence
-
- if (SrcGEP->getPointerOperandType() != DstGEP->getPointerOperandType())
- return new Dependence(Src, Dst); // different types, assume dependence
-
// establish loop nesting levels
establishNestingLevels(Src, Dst);
DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
@@ -3241,36 +3235,62 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src,
FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
++TotalArrayPairs;
- // classify subscript pairs
- unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin();
+ // See if there are GEPs we can use.
+ bool UsefulGEP = false;
+ GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
+ GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
+ if (SrcGEP && DstGEP &&
+ SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) {
+ const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand());
+ const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand());
+ DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n");
+ DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n");
+
+ UsefulGEP =
+ isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
+ isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent()));
+ }
+ unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
SmallVector<Subscript, 4> Pair(Pairs);
- for (unsigned SI = 0; SI < Pairs; ++SI) {
- Pair[SI].Loops.resize(MaxLevels + 1);
- Pair[SI].GroupLoops.resize(MaxLevels + 1);
- Pair[SI].Group.resize(Pairs);
- }
- Pairs = 0;
- for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
- SrcEnd = SrcGEP->idx_end(),
- DstIdx = DstGEP->idx_begin(),
- DstEnd = DstGEP->idx_end();
- SrcIdx != SrcEnd && DstIdx != DstEnd;
- ++SrcIdx, ++DstIdx, ++Pairs) {
- Pair[Pairs].Src = SE->getSCEV(*SrcIdx);
- Pair[Pairs].Dst = SE->getSCEV(*DstIdx);
- removeMatchingExtensions(&Pair[Pairs]);
- Pair[Pairs].Classification =
- classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()),
- Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()),
- Pair[Pairs].Loops);
- Pair[Pairs].GroupLoops = Pair[Pairs].Loops;
- Pair[Pairs].Group.set(Pairs);
- DEBUG(dbgs() << " subscript " << Pairs << "\n");
- DEBUG(dbgs() << "\tsrc = " << *Pair[Pairs].Src << "\n");
- DEBUG(dbgs() << "\tdst = " << *Pair[Pairs].Dst << "\n");
- DEBUG(dbgs() << "\tclass = " << Pair[Pairs].Classification << "\n");
+ if (UsefulGEP) {
+ DEBUG(dbgs() << " using GEPs\n");
+ unsigned P = 0;
+ for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
+ SrcEnd = SrcGEP->idx_end(),
+ DstIdx = DstGEP->idx_begin();
+ SrcIdx != SrcEnd;
+ ++SrcIdx, ++DstIdx, ++P) {
+ Pair[P].Src = SE->getSCEV(*SrcIdx);
+ Pair[P].Dst = SE->getSCEV(*DstIdx);
+ }
+ }
+ else {
+ DEBUG(dbgs() << " ignoring GEPs\n");
+ const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
+ const SCEV *DstSCEV = SE->getSCEV(DstPtr);
+ DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n");
+ DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n");
+ Pair[0].Src = SrcSCEV;
+ Pair[0].Dst = DstSCEV;
+ }
+
+ for (unsigned P = 0; P < Pairs; ++P) {
+ Pair[P].Loops.resize(MaxLevels + 1);
+ Pair[P].GroupLoops.resize(MaxLevels + 1);
+ Pair[P].Group.resize(Pairs);
+ removeMatchingExtensions(&Pair[P]);
+ Pair[P].Classification =
+ classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
+ Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
+ Pair[P].Loops);
+ Pair[P].GroupLoops = Pair[P].Loops;
+ Pair[P].Group.set(P);
+ DEBUG(dbgs() << " subscript " << P << "\n");
+ DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
+ DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
+ DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
DEBUG(dbgs() << "\tloops = ");
- DEBUG(dumpSmallBitVector(Pair[Pairs].Loops));
+ DEBUG(dumpSmallBitVector(Pair[P].Loops));
}
SmallBitVector Separable(Pairs);
@@ -3565,7 +3585,8 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src,
// though simplified since we know that the dependence exists.
// It's tedious, since we must go through all propagations, etc.
//
-// Care is required to keep this code up to date w.r.t. the code above.
+// Care is required to keep this code up to date with respect to the routine
+// above, depends().
//
// Generally, the dependence analyzer will be used to build
// a dependence graph for a function (basically a map from instructions
@@ -3608,50 +3629,65 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep,
assert(Dep && "expected a pointer to a Dependence");
assert(Dep->isSplitable(SplitLevel) &&
"Dep should be splitable at SplitLevel");
- const Instruction *Src = Dep->getSrc();
- const Instruction *Dst = Dep->getDst();
+ Instruction *Src = Dep->getSrc();
+ Instruction *Dst = Dep->getDst();
assert(Src->mayReadFromMemory() || Src->mayWriteToMemory());
assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());
assert(isLoadOrStore(Src));
assert(isLoadOrStore(Dst));
- const Value *SrcPtr = getPointerOperand(Src);
- const Value *DstPtr = getPointerOperand(Dst);
+ Value *SrcPtr = getPointerOperand(Src);
+ Value *DstPtr = getPointerOperand(Dst);
assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) ==
AliasAnalysis::MustAlias);
- const GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
- const GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
- assert(SrcGEP);
- assert(DstGEP);
- assert(SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType());
// establish loop nesting levels
establishNestingLevels(Src, Dst);
FullDependence Result(Src, Dst, false, CommonLevels);
- // classify subscript pairs
- unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin();
+ // See if there are GEPs we can use.
+ bool UsefulGEP = false;
+ GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
+ GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
+ if (SrcGEP && DstGEP &&
+ SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) {
+ const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand());
+ const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand());
+ UsefulGEP =
+ isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
+ isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent()));
+ }
+ unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
SmallVector<Subscript, 4> Pair(Pairs);
- for (unsigned SI = 0; SI < Pairs; ++SI) {
- Pair[SI].Loops.resize(MaxLevels + 1);
- Pair[SI].GroupLoops.resize(MaxLevels + 1);
- Pair[SI].Group.resize(Pairs);
- }
- Pairs = 0;
- for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
- SrcEnd = SrcGEP->idx_end(),
- DstIdx = DstGEP->idx_begin(),
- DstEnd = DstGEP->idx_end();
- SrcIdx != SrcEnd && DstIdx != DstEnd;
- ++SrcIdx, ++DstIdx, ++Pairs) {
- Pair[Pairs].Src = SE->getSCEV(*SrcIdx);
- Pair[Pairs].Dst = SE->getSCEV(*DstIdx);
- Pair[Pairs].Classification =
- classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()),
- Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()),
- Pair[Pairs].Loops);
- Pair[Pairs].GroupLoops = Pair[Pairs].Loops;
- Pair[Pairs].Group.set(Pairs);
+ if (UsefulGEP) {
+ unsigned P = 0;
+ for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
+ SrcEnd = SrcGEP->idx_end(),
+ DstIdx = DstGEP->idx_begin();
+ SrcIdx != SrcEnd;
+ ++SrcIdx, ++DstIdx, ++P) {
+ Pair[P].Src = SE->getSCEV(*SrcIdx);
+ Pair[P].Dst = SE->getSCEV(*DstIdx);
+ }
+ }
+ else {
+ const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
+ const SCEV *DstSCEV = SE->getSCEV(DstPtr);
+ Pair[0].Src = SrcSCEV;
+ Pair[0].Dst = DstSCEV;
+ }
+
+ for (unsigned P = 0; P < Pairs; ++P) {
+ Pair[P].Loops.resize(MaxLevels + 1);
+ Pair[P].GroupLoops.resize(MaxLevels + 1);
+ Pair[P].Group.resize(Pairs);
+ removeMatchingExtensions(&Pair[P]);
+ Pair[P].Classification =
+ classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
+ Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
+ Pair[P].Loops);
+ Pair[P].GroupLoops = Pair[P].Loops;
+ Pair[P].Group.set(P);
}
SmallBitVector Separable(Pairs);
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index 449b7ee87b..f486937654 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -51,6 +51,9 @@ public:
/// whether any of the passes modifies the module, and if so, return true.
bool runOnModule(Module &M);
+ using ModulePass::doInitialization;
+ using ModulePass::doFinalization;
+
bool doInitialization(CallGraph &CG);
bool doFinalization(CallGraph &CG);
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 5f51f775f1..458e25503d 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -49,7 +49,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
int Threshold;
int Cost;
- const bool AlwaysInline;
bool IsCallerRecursive;
bool IsRecursiveCall;
@@ -128,7 +127,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
public:
CallAnalyzer(const DataLayout *TD, Function &Callee, int Threshold)
: TD(TD), F(Callee), Threshold(Threshold), Cost(0),
- AlwaysInline(F.getFnAttributes().hasAttribute(Attributes::AlwaysInline)),
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0),
NumInstructions(0), NumVectorInstructions(0),
@@ -142,7 +140,6 @@ public:
int getThreshold() { return Threshold; }
int getCost() { return Cost; }
- bool isAlwaysInline() { return AlwaysInline; }
// Keep a bunch of stats about the cost savings found so we can print them
// out when debugging.
@@ -281,9 +278,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
Ty->getPrimitiveSizeInBits());
}
- // We will happily inline static alloca instructions or dynamic alloca
- // instructions in always-inline situations.
- if (AlwaysInline || I.isStaticAlloca())
+ // We will happily inline static alloca instructions.
+ if (I.isStaticAlloca())
return Base::visitAlloca(I);
// FIXME: This is overly conservative. Dynamic allocas are inefficient for
@@ -743,7 +739,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
// Check if we've past the threshold so we don't spin in huge basic
// blocks that will never inline.
- if (!AlwaysInline && Cost > (Threshold + VectorBonus))
+ if (Cost > (Threshold + VectorBonus))
return false;
}
@@ -794,7 +790,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
/// viable. It computes the cost and adjusts the threshold based on numerous
/// factors and heuristics. If this method returns false but the computed cost
/// is below the computed threshold, then inlining was forcibly disabled by
-/// some artifact of the rountine.
+/// some artifact of the routine.
bool CallAnalyzer::analyzeCall(CallSite CS) {
++NumCallsAnalyzed;
@@ -805,70 +801,69 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
int SingleBBBonus = Threshold / 2;
Threshold += SingleBBBonus;
- // Unless we are always-inlining, perform some tweaks to the cost and
- // threshold based on the direct callsite information.
- if (!AlwaysInline) {
- // We want to more aggressively inline vector-dense kernels, so up the
- // threshold, and we'll lower it if the % of vector instructions gets too
- // low.
- assert(NumInstructions == 0);
- assert(NumVectorInstructions == 0);
- FiftyPercentVectorBonus = Threshold;
- TenPercentVectorBonus = Threshold / 2;
-
- // Give out bonuses per argument, as the instructions setting them up will
- // be gone after inlining.
- for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
- if (TD && CS.isByValArgument(I)) {
- // We approximate the number of loads and stores needed by dividing the
- // size of the byval type by the target's pointer size.
- PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
- unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType());
- unsigned PointerSize = TD->getPointerSizeInBits();
- // Ceiling division.
- unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
-
- // If it generates more than 8 stores it is likely to be expanded as an
- // inline memcpy so we take that as an upper bound. Otherwise we assume
- // one load and one store per word copied.
- // FIXME: The maxStoresPerMemcpy setting from the target should be used
- // here instead of a magic number of 8, but it's not available via
- // DataLayout.
- NumStores = std::min(NumStores, 8U);
-
- Cost -= 2 * NumStores * InlineConstants::InstrCost;
- } else {
- // For non-byval arguments subtract off one instruction per call
- // argument.
- Cost -= InlineConstants::InstrCost;
- }
+ // Perform some tweaks to the cost and threshold based on the direct
+ // callsite information.
+
+ // We want to more aggressively inline vector-dense kernels, so up the
+ // threshold, and we'll lower it if the % of vector instructions gets too
+ // low.
+ assert(NumInstructions == 0);
+ assert(NumVectorInstructions == 0);
+ FiftyPercentVectorBonus = Threshold;
+ TenPercentVectorBonus = Threshold / 2;
+
+ // Give out bonuses per argument, as the instructions setting them up will
+ // be gone after inlining.
+ for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
+ if (TD && CS.isByValArgument(I)) {
+ // We approximate the number of loads and stores needed by dividing the
+ // size of the byval type by the target's pointer size.
+ PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
+ unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType());
+ unsigned PointerSize = TD->getPointerSizeInBits();
+ // Ceiling division.
+ unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
+
+ // If it generates more than 8 stores it is likely to be expanded as an
+ // inline memcpy so we take that as an upper bound. Otherwise we assume
+ // one load and one store per word copied.
+ // FIXME: The maxStoresPerMemcpy setting from the target should be used
+ // here instead of a magic number of 8, but it's not available via
+ // DataLayout.
+ NumStores = std::min(NumStores, 8U);
+
+ Cost -= 2 * NumStores * InlineConstants::InstrCost;
+ } else {
+ // For non-byval arguments subtract off one instruction per call
+ // argument.
+ Cost -= InlineConstants::InstrCost;
}
+ }
- // If there is only one call of the function, and it has internal linkage,
- // the cost of inlining it drops dramatically.
- if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction())
- Cost += InlineConstants::LastCallToStaticBonus;
-
- // If the instruction after the call, or if the normal destination of the
- // invoke is an unreachable instruction, the function is noreturn. As such,
- // there is little point in inlining this unless there is literally zero
- // cost.
- Instruction *Instr = CS.getInstruction();
- if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
- if (isa<UnreachableInst>(II->getNormalDest()->begin()))
- Threshold = 1;
- } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
+ // If there is only one call of the function, and it has internal linkage,
+ // the cost of inlining it drops dramatically.
+ if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction())
+ Cost += InlineConstants::LastCallToStaticBonus;
+
+ // If the instruction after the call, or if the normal destination of the
+ // invoke is an unreachable instruction, the function is noreturn. As such,
+ // there is little point in inlining this unless there is literally zero
+ // cost.
+ Instruction *Instr = CS.getInstruction();
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
+ if (isa<UnreachableInst>(II->getNormalDest()->begin()))
Threshold = 1;
+ } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
+ Threshold = 1;
- // If this function uses the coldcc calling convention, prefer not to inline
- // it.
- if (F.getCallingConv() == CallingConv::Cold)
- Cost += InlineConstants::ColdccPenalty;
+ // If this function uses the coldcc calling convention, prefer not to inline
+ // it.
+ if (F.getCallingConv() == CallingConv::Cold)
+ Cost += InlineConstants::ColdccPenalty;
- // Check if we're done. This can happen due to bonuses and penalties.
- if (Cost > Threshold)
- return false;
- }
+ // Check if we're done. This can happen due to bonuses and penalties.
+ if (Cost > Threshold)
+ return false;
if (F.empty())
return true;
@@ -930,7 +925,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
// Bail out the moment we cross the threshold. This means we'll under-count
// the cost, but only when undercounting doesn't matter.
- if (!AlwaysInline && Cost > (Threshold + VectorBonus))
+ if (Cost > (Threshold + VectorBonus))
break;
BasicBlock *BB = BBWorklist[Idx];
@@ -1015,7 +1010,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
Threshold += VectorBonus;
- return AlwaysInline || Cost < Threshold;
+ return Cost < Threshold;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1040,10 +1035,22 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int Threshold) {
InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee,
int Threshold) {
+ // Cannot inline indirect calls.
+ if (!Callee)
+ return llvm::InlineCost::getNever();
+
+ // Calls to functions with always-inline attributes should be inlined
+ // whenever possible.
+ if (Callee->getFnAttributes().hasAttribute(Attributes::AlwaysInline)) {
+ if (isInlineViable(*Callee))
+ return llvm::InlineCost::getAlways();
+ return llvm::InlineCost::getNever();
+ }
+
// Don't inline functions which can be redefined at link-time to mean
// something else. Don't inline functions marked noinline or call sites
// marked noinline.
- if (!Callee || Callee->mayBeOverridden() ||
+ if (Callee->mayBeOverridden() ||
Callee->getFnAttributes().hasAttribute(Attributes::NoInline) ||
CS.isNoInline())
return llvm::InlineCost::getNever();
@@ -1059,9 +1066,36 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee,
// Check if there was a reason to force inlining or no inlining.
if (!ShouldInline && CA.getCost() < CA.getThreshold())
return InlineCost::getNever();
- if (ShouldInline && (CA.isAlwaysInline() ||
- CA.getCost() >= CA.getThreshold()))
+ if (ShouldInline && CA.getCost() >= CA.getThreshold())
return InlineCost::getAlways();
return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
}
+
+bool InlineCostAnalyzer::isInlineViable(Function &F) {
+ bool ReturnsTwice =F.getFnAttributes().hasAttribute(Attributes::ReturnsTwice);
+ for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+ // Disallow inlining of functions which contain an indirect branch.
+ if (isa<IndirectBrInst>(BI->getTerminator()))
+ return false;
+
+ for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
+ ++II) {
+ CallSite CS(II);
+ if (!CS)
+ continue;
+
+ // Disallow recursive calls.
+ if (&F == CS.getCalledFunction())
+ return false;
+
+ // Disallow calls which expose returns-twice to a function not previously
+ // attributed as such.
+ if (!ReturnsTwice && CS.isCall() &&
+ cast<CallInst>(CS.getInstruction())->canReturnTwice())
+ return false;
+ }
+ }
+
+ return true;
+}
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index b3d62487fc..a76e5ad1b8 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -2065,8 +2065,25 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (A && C && (A == C || A == D || B == C || B == D) &&
NoLHSWrapProblem && NoRHSWrapProblem) {
// Determine Y and Z in the form icmp (X+Y), (X+Z).
- Value *Y = (A == C || A == D) ? B : A;
- Value *Z = (C == A || C == B) ? D : C;
+ Value *Y, *Z;
+ if (A == C) {
+ // C + B == C + D -> B == D
+ Y = B;
+ Z = D;
+ } else if (A == D) {
+ // D + B == C + D -> B == C
+ Y = B;
+ Z = C;
+ } else if (B == C) {
+ // A + C == C + D -> A == D
+ Y = A;
+ Z = D;
+ } else {
+ assert(B == D);
+ // A + D == C + D -> A == C
+ Y = A;
+ Z = C;
+ }
if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse-1))
return V;
}
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index a60e4aa41c..d8ffe8fb73 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -744,7 +744,7 @@ lltok::Kind LLLexer::Lex0x() {
/// HexFP128Constant 0xL[0-9A-Fa-f]+
/// HexPPC128Constant 0xM[0-9A-Fa-f]+
lltok::Kind LLLexer::LexDigitOrNegative() {
- // If the letter after the negative is a number, this is probably a label.
+ // If the letter after the negative is not a number, this is probably a label.
if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) {
// Okay, this is not a number after the -, it's probably a label.
if (const char *End = isLabelTail(CurPtr)) {
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index ac803c5783..41cca68576 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -52,10 +52,10 @@ bool LLParser::ValidateEndOfModule() {
I != E; ++I) {
Instruction *Inst = I->first;
const std::vector<MDRef> &MDList = I->second;
-
+
for (unsigned i = 0, e = MDList.size(); i != e; ++i) {
unsigned SlotNo = MDList[i].MDSlot;
-
+
if (SlotNo >= NumberedMetadata.size() || NumberedMetadata[SlotNo] == 0)
return Error(MDList[i].Loc, "use of undefined metadata '!" +
Twine(SlotNo) + "'");
@@ -64,8 +64,8 @@ bool LLParser::ValidateEndOfModule() {
}
ForwardRefInstMetadata.clear();
}
-
-
+
+
// If there are entries in ForwardRefBlockAddresses at this point, they are
// references after the function was defined. Resolve those now.
while (!ForwardRefBlockAddresses.empty()) {
@@ -76,19 +76,19 @@ bool LLParser::ValidateEndOfModule() {
TheFn = M->getFunction(Fn.StrVal);
else if (Fn.UIntVal < NumberedVals.size())
TheFn = dyn_cast<Function>(NumberedVals[Fn.UIntVal]);
-
+
if (TheFn == 0)
return Error(Fn.Loc, "unknown function referenced by blockaddress");
-
+
// Resolve all these references.
- if (ResolveForwardRefBlockAddresses(TheFn,
+ if (ResolveForwardRefBlockAddresses(TheFn,
ForwardRefBlockAddresses.begin()->second,
0))
return true;
-
+
ForwardRefBlockAddresses.erase(ForwardRefBlockAddresses.begin());
}
-
+
for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i)
if (NumberedTypes[i].second.isValid())
return Error(NumberedTypes[i].second,
@@ -123,7 +123,7 @@ bool LLParser::ValidateEndOfModule() {
return false;
}
-bool LLParser::ResolveForwardRefBlockAddresses(Function *TheFn,
+bool LLParser::ResolveForwardRefBlockAddresses(Function *TheFn,
std::vector<std::pair<ValID, GlobalValue*> > &Refs,
PerFunctionState *PFS) {
// Loop over all the references, resolving them.
@@ -141,11 +141,11 @@ bool LLParser::ResolveForwardRefBlockAddresses(Function *TheFn,
Res = dyn_cast_or_null<BasicBlock>(
TheFn->getValueSymbolTable().lookup(Refs[i].first.StrVal));
}
-
+
if (Res == 0)
return Error(Refs[i].first.Loc,
"referenced value is not a basic block");
-
+
// Get the BlockAddress for this and update references to use it.
BlockAddress *BA = BlockAddress::get(TheFn, Res);
Refs[i].second->replaceAllUsesWith(BA);
@@ -302,11 +302,11 @@ bool LLParser::ParseUnnamedType() {
if (TypeID >= NumberedTypes.size())
NumberedTypes.resize(TypeID+1);
-
+
Type *Result = 0;
if (ParseStructDefinition(TypeLoc, "",
NumberedTypes[TypeID], Result)) return true;
-
+
if (!isa<StructType>(Result)) {
std::pair<Type*, LocTy> &Entry = NumberedTypes[TypeID];
if (Entry.first)
@@ -329,11 +329,11 @@ bool LLParser::ParseNamedType() {
if (ParseToken(lltok::equal, "expected '=' after name") ||
ParseToken(lltok::kw_type, "expected 'type' after name"))
return true;
-
+
Type *Result = 0;
if (ParseStructDefinition(NameLoc, Name,
NamedTypes[Name], Result)) return true;
-
+
if (!isa<StructType>(Result)) {
std::pair<Type*, LocTy> &Entry = NamedTypes[Name];
if (Entry.first)
@@ -341,7 +341,7 @@ bool LLParser::ParseNamedType() {
Entry.first = Result;
Entry.second = SMLoc();
}
-
+
return false;
}
@@ -473,7 +473,7 @@ bool LLParser::ParseMDNodeID(MDNode *&Result) {
// Otherwise, create MDNode forward reference.
MDNode *FwdNode = MDNode::getTemporary(Context, ArrayRef<Value*>());
ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc());
-
+
if (NumberedMetadata.size() <= MID)
NumberedMetadata.resize(MID+1);
NumberedMetadata[MID] = FwdNode;
@@ -498,7 +498,7 @@ bool LLParser::ParseNamedMetadata() {
do {
if (ParseToken(lltok::exclaim, "Expected '!' here"))
return true;
-
+
MDNode *N = 0;
if (ParseMDNodeID(N)) return true;
NMD->addOperand(N);
@@ -530,7 +530,7 @@ bool LLParser::ParseStandaloneMetadata() {
return true;
MDNode *Init = MDNode::get(Context, Elts);
-
+
// See if this was forward referenced, if so, handle it.
std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> >::iterator
FI = ForwardRefMDNodes.find(MetadataID);
@@ -539,7 +539,7 @@ bool LLParser::ParseStandaloneMetadata() {
Temp->replaceAllUsesWith(Init);
MDNode::deleteTemporary(Temp);
ForwardRefMDNodes.erase(FI);
-
+
assert(NumberedMetadata[MetadataID] == Init && "Tracking VH didn't work");
} else {
if (MetadataID >= NumberedMetadata.size())
@@ -779,7 +779,9 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M);
else
FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
- GlobalValue::ExternalWeakLinkage, 0, Name);
+ GlobalValue::ExternalWeakLinkage, 0, Name,
+ 0, GlobalVariable::NotThreadLocal,
+ PTy->getAddressSpace());
ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
return FwdVal;
@@ -1205,7 +1207,7 @@ bool LLParser::ParseOptionalAlignment(unsigned &Alignment) {
}
/// ParseOptionalCommaAlign
-/// ::=
+/// ::=
/// ::= ',' align 4
///
/// This returns with AteExtraComma set to true if it ate an excess comma at the
@@ -1219,7 +1221,7 @@ bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment,
AteExtraComma = true;
return false;
}
-
+
if (Lex.getKind() != lltok::kw_align)
return Error(Lex.getLoc(), "expected metadata or 'align'");
@@ -1287,7 +1289,7 @@ bool LLParser::ParseOptionalStackAlignment(unsigned &Alignment) {
bool LLParser::ParseIndexList(SmallVectorImpl<unsigned> &Indices,
bool &AteExtraComma) {
AteExtraComma = false;
-
+
if (Lex.getKind() != lltok::comma)
return TokError("expected ',' as start of index list");
@@ -1343,7 +1345,7 @@ bool LLParser::ParseType(Type *&Result, bool AllowVoid) {
case lltok::LocalVar: {
// Type ::= %foo
std::pair<Type*, LocTy> &Entry = NamedTypes[Lex.getStrVal()];
-
+
// If the type hasn't been defined yet, create a forward definition and
// remember where that forward def'n was seen (in case it never is defined).
if (Entry.first == 0) {
@@ -1360,7 +1362,7 @@ bool LLParser::ParseType(Type *&Result, bool AllowVoid) {
if (Lex.getUIntVal() >= NumberedTypes.size())
NumberedTypes.resize(Lex.getUIntVal()+1);
std::pair<Type*, LocTy> &Entry = NumberedTypes[Lex.getUIntVal()];
-
+
// If the type hasn't been defined yet, create a forward definition and
// remember where that forward def'n was seen (in case it never is defined).
if (Entry.first == 0) {
@@ -1569,7 +1571,7 @@ bool LLParser::ParseFunctionType(Type *&Result) {
bool LLParser::ParseAnonStructType(Type *&Result, bool Packed) {
SmallVector<Type*, 8> Elts;
if (ParseStructBody(Elts)) return true;
-
+
Result = StructType::get(Context, Elts, Packed);
return false;
}
@@ -1581,20 +1583,20 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
// If the type was already defined, diagnose the redefinition.
if (Entry.first && !Entry.second.isValid())
return Error(TypeLoc, "redefinition of type");
-
+
// If we have opaque, just return without filling in the definition for the
// struct. This counts as a definition as far as the .ll file goes.
if (EatIfPresent(lltok::kw_opaque)) {
// This type is being defined, so clear the location to indicate this.
Entry.second = SMLoc();
-
+
// If this type number has never been uttered, create it.
if (Entry.first == 0)
Entry.first = StructType::create(Context, Name);
ResultTy = Entry.first;
return false;
}
-
+
// If the type starts with '<', then it is either a packed struct or a vector.
bool isPacked = EatIfPresent(lltok::less);
@@ -1604,27 +1606,27 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
if (Lex.getKind() != lltok::lbrace) {
if (Entry.first)
return Error(TypeLoc, "forward references to non-struct type");
-
+
ResultTy = 0;
if (isPacked)
return ParseArrayVectorType(ResultTy, true);
return ParseType(ResultTy);
}
-
+
// This type is being defined, so clear the location to indicate this.
Entry.second = SMLoc();
-
+
// If this type number has never been uttered, create it.
if (Entry.first == 0)
Entry.first = StructType::create(Context, Name);
-
+
StructType *STy = cast<StructType>(Entry.first);
-
+
SmallVector<Type*, 8> Body;
if (ParseStructBody(Body) ||
(isPacked && ParseToken(lltok::greater, "expected '>' in packed struct")))
return true;
-
+
STy->setBody(Body, isPacked);
ResultTy = STy;
return false;
@@ -1697,8 +1699,7 @@ bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
if ((unsigned)Size != Size)
return Error(SizeLoc, "size too large for vector");
if (!VectorType::isValidElementType(EltTy))
- return Error(TypeLoc,
- "vector element type must be fp, integer or a pointer to these types");
+ return Error(TypeLoc, "invalid vector element type");
Result = VectorType::get(EltTy, unsigned(Size));
} else {
if (!ArrayType::isValidElementType(EltTy))
@@ -1755,18 +1756,18 @@ bool LLParser::PerFunctionState::FinishFunction() {
FunctionID.Kind = ValID::t_GlobalID;
FunctionID.UIntVal = FunctionNumber;
}
-
+
std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >::iterator
FRBAI = P.ForwardRefBlockAddresses.find(FunctionID);
if (FRBAI != P.ForwardRefBlockAddresses.end()) {
// Resolve all these references.
if (P.ResolveForwardRefBlockAddresses(&F, FRBAI->second, this))
return true;
-
+
P.ForwardRefBlockAddresses.erase(FRBAI);
}
}
-
+
if (!ForwardRefVals.empty())
return P.Error(ForwardRefVals.begin()->second.second,
"use of undefined value '%" + ForwardRefVals.begin()->first +
@@ -2139,19 +2140,19 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
ValID Fn, Label;
LocTy FnLoc, LabelLoc;
-
+
if (ParseToken(lltok::lparen, "expected '(' in block address expression") ||
ParseValID(Fn) ||
ParseToken(lltok::comma, "expected comma in block address expression")||
ParseValID(Label) ||
ParseToken(lltok::rparen, "expected ')' in block address expression"))
return true;
-
+
if (Fn.Kind != ValID::t_GlobalID && Fn.Kind != ValID::t_GlobalName)
return Error(Fn.Loc, "expected function name in blockaddress");
if (Label.Kind != ValID::t_LocalID && Label.Kind != ValID::t_LocalName)
return Error(Label.Loc, "expected basic block name in blockaddress");
-
+
// Make a global variable as a placeholder for this reference.
GlobalVariable *FwdRef = new GlobalVariable(*M, Type::getInt8Ty(Context),
false, GlobalValue::InternalLinkage,
@@ -2161,7 +2162,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
ID.Kind = ValID::t_Constant;
return false;
}
-
+
case lltok::kw_trunc:
case lltok::kw_zext:
case lltok::kw_sext:
@@ -2541,7 +2542,7 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
return (V == 0);
case ValID::t_InlineAsm: {
PointerType *PTy = dyn_cast<PointerType>(Ty);
- FunctionType *FTy =
+ FunctionType *FTy =
PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0;
if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2))
return Error(ID.Loc, "invalid type for inline asm constraint string");
@@ -2630,13 +2631,13 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
"initializer with struct type has wrong # elements");
if (ST->isPacked() != (ID.Kind == ValID::t_PackedConstantStruct))
return Error(ID.Loc, "packed'ness of initializer and type don't match");
-
+
// Verify that the elements are compatible with the structtype.
for (unsigned i = 0, e = ID.UIntVal; i != e; ++i)
if (ID.ConstantStructElts[i]->getType() != ST->getElementType(i))
return Error(ID.Loc, "element " + Twine(i) +
" of struct initializer doesn't match struct element type");
-
+
V = ConstantStruct::get(ST, makeArrayRef(ID.ConstantStructElts,
ID.UIntVal));
} else
@@ -2792,7 +2793,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
Attributes::get(RetType->getContext(),
FuncAttrs)));
- AttrListPtr PAL = AttrListPtr::get(Attrs);
+ AttrListPtr PAL = AttrListPtr::get(Context, Attrs);
if (PAL.getParamAttributes(1).hasAttribute(Attributes::StructRet) &&
!RetType->isVoidTy())
@@ -2816,7 +2817,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
if (Fn->getType() != PFT)
return Error(FRVI->second.second, "invalid forward reference to "
"function '" + FunctionName + "' with wrong type!");
-
+
ForwardRefVals.erase(FRVI);
} else if ((Fn = M->getFunction(FunctionName))) {
// Reject redefinitions.
@@ -2885,13 +2886,13 @@ bool LLParser::ParseFunctionBody(Function &Fn) {
int FunctionNumber = -1;
if (!Fn.hasName()) FunctionNumber = NumberedVals.size()-1;
-
+
PerFunctionState PFS(*this, Fn, FunctionNumber);
// We need at least one basic block.
if (Lex.getKind() == lltok::rbrace)
return TokError("function body requires at least one basic block");
-
+
while (Lex.getKind() != lltok::rbrace)
if (ParseBasicBlock(PFS)) return true;
@@ -2959,7 +2960,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
// *must* be followed by metadata.
if (ParseInstructionMetadata(Inst, &PFS))
return true;
- break;
+ break;
}
// Set the name on the instruction.
@@ -3002,9 +3003,9 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
bool NUW = EatIfPresent(lltok::kw_nuw);
bool NSW = EatIfPresent(lltok::kw_nsw);
if (!NUW) NUW = EatIfPresent(lltok::kw_nuw);
-
+
if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
-
+
if (NUW) cast<BinaryOperator>(Inst)->setHasNoUnsignedWrap(true);
if (NSW) cast<BinaryOperator>(Inst)->setHasNoSignedWrap(true);
return false;
@@ -3124,12 +3125,12 @@ bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
if (ParseType(Ty, true /*void allowed*/)) return true;
Type *ResType = PFS.getFunction().getReturnType();
-
+
if (Ty->isVoidTy()) {
if (!ResType->isVoidTy())
return Error(TypeLoc, "value doesn't match function result type '" +
getTypeString(ResType) + "'");
-
+
Inst = ReturnInst::Create(Context);
return false;
}
@@ -3140,7 +3141,7 @@ bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
if (ResType != RV->getType())
return Error(TypeLoc, "value doesn't match function result type '" +
getTypeString(ResType) + "'");
-
+
Inst = ReturnInst::Create(Context, RV);
return false;
}
@@ -3202,7 +3203,7 @@ bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) {
ParseToken(lltok::comma, "expected ',' after case value") ||
ParseTypeAndBasicBlock(DestBB, PFS))
return true;
-
+
if (!SeenCases.insert(Constant))
return Error(CondLoc, "duplicate case value in switch");
if (!isa<ConstantInt>(Constant))
@@ -3230,26 +3231,26 @@ bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) {
ParseToken(lltok::comma, "expected ',' after indirectbr address") ||
ParseToken(lltok::lsquare, "expected '[' with indirectbr"))
return true;
-
+
if (!Address->getType()->isPointerTy())
return Error(AddrLoc, "indirectbr address must have pointer type");
-
+
// Parse the destination list.
SmallVector<BasicBlock*, 16> DestList;
-
+
if (Lex.getKind() != lltok::rsquare) {
BasicBlock *DestBB;
if (ParseTypeAndBasicBlock(DestBB, PFS))
return true;
DestList.push_back(DestBB);
-
+
while (EatIfPresent(lltok::comma)) {
if (ParseTypeAndBasicBlock(DestBB, PFS))
return true;
DestList.push_back(DestBB);
}
}
-
+
if (ParseToken(lltok::rsquare, "expected ']' at end of block list"))
return true;
@@ -3349,7 +3350,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
FnAttrs)));
// Finish off the Attributes and check them
- AttrListPtr PAL = AttrListPtr::get(Attrs);
+ AttrListPtr PAL = AttrListPtr::get(Context, Attrs);
InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB, Args);
II->setCallingConv(CC);
@@ -3751,7 +3752,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
FnAttrs)));
// Finish off the Attributes and check them
- AttrListPtr PAL = AttrListPtr::get(Attrs);
+ AttrListPtr PAL = AttrListPtr::get(Context, Attrs);
CallInst *CI = CallInst::Create(Callee, Args);
CI->setTailCall(isTail);
@@ -3796,7 +3797,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
/// ParseLoad
/// ::= 'load' 'volatile'? TypeAndValue (',' 'align' i32)?
-/// ::= 'load' 'atomic' 'volatile'? TypeAndValue
+/// ::= 'load' 'atomic' 'volatile'? TypeAndValue
/// 'singlethread'? AtomicOrdering (',' 'align' i32)?
int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
Value *Val; LocTy Loc;
@@ -4032,9 +4033,6 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
Indices.push_back(Val);
}
- if (Val && Val->getType()->isVectorTy() && Indices.size() != 1)
- return Error(EltLoc, "vector getelementptrs must have a single index");
-
if (!GetElementPtrInst::getIndexedType(Ptr->getType(), Indices))
return Error(Loc, "invalid getelementptr indices");
Inst = GetElementPtrInst::Create(Ptr, Indices);
@@ -4073,7 +4071,7 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
ParseTypeAndValue(Val1, Loc1, PFS) ||
ParseIndexList(Indices, AteExtraComma))
return true;
-
+
if (!Val0->getType()->isAggregateType())
return Error(Loc0, "insertvalue operand must be aggregate type");
@@ -4103,7 +4101,7 @@ bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts,
Elts.push_back(0);
continue;
}
-
+
Value *V = 0;
if (ParseTypeAndValue(V, PFS)) return true;
Elts.push_back(V);
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index c6bbdb27ae..9f9672d67b 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -55,7 +55,7 @@ namespace llvm {
t_ConstantStruct, // Value in ConstantStructElts.
t_PackedConstantStruct // Value in ConstantStructElts.
} Kind;
-
+
LLLexer::LocTy Loc;
unsigned UIntVal;
std::string StrVal, StrVal2;
@@ -65,23 +65,23 @@ namespace llvm {
MDNode *MDNodeVal;
MDString *MDStringVal;
Constant **ConstantStructElts;
-
+
ValID() : Kind(t_LocalID), APFloatVal(0.0) {}
~ValID() {
if (Kind == t_ConstantStruct || Kind == t_PackedConstantStruct)
delete [] ConstantStructElts;
}
-
+
bool operator<(const ValID &RHS) const {
if (Kind == t_LocalID || Kind == t_GlobalID)
return UIntVal < RHS.UIntVal;
assert((Kind == t_LocalName || Kind == t_GlobalName ||
- Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) &&
+ Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) &&
"Ordering not defined for this ValID kind yet");
return StrVal < RHS.StrVal;
}
};
-
+
class LLParser {
public:
typedef LLLexer::LocTy LocTy;
@@ -89,7 +89,7 @@ namespace llvm {
LLVMContext &Context;
LLLexer Lex;
Module *M;
-
+
// Instruction metadata resolution. Each instruction can have a list of
// MDRef info associated with them.
//
@@ -110,7 +110,7 @@ namespace llvm {
// have processed a use of the type but not a definition yet.
StringMap<std::pair<Type*, LocTy> > NamedTypes;
std::vector<std::pair<Type*, LocTy> > NumberedTypes;
-
+
std::vector<TrackingVH<MDNode> > NumberedMetadata;
std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> > ForwardRefMDNodes;
@@ -118,14 +118,14 @@ namespace llvm {
std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
std::vector<GlobalValue*> NumberedVals;
-
+
// References to blockaddress. The key is the function ValID, the value is
// a list of references to blocks in that function.
std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >
ForwardRefBlockAddresses;
-
+
public:
- LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) :
+ LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) :
Context(m->getContext()), Lex(F, SM, Err, m->getContext()),
M(m) {}
bool Run();
@@ -241,7 +241,7 @@ namespace llvm {
std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals;
std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs;
std::vector<Value*> NumberedVals;
-
+
/// FunctionNumber - If this is an unnamed function, this is the slot
/// number of it, otherwise it is -1.
int FunctionNumber;
@@ -375,8 +375,8 @@ namespace llvm {
int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
-
- bool ResolveForwardRefBlockAddresses(Function *TheFn,
+
+ bool ResolveForwardRefBlockAddresses(Function *TheFn,
std::vector<std::pair<ValID, GlobalValue*> > &Refs,
PerFunctionState *PFS);
};
diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp
index 15844c0041..448504c89e 100644
--- a/lib/Bitcode/Reader/BitReader.cpp
+++ b/lib/Bitcode/Reader/BitReader.cpp
@@ -30,7 +30,7 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
LLVMModuleRef *OutModule,
char **OutMessage) {
std::string Message;
-
+
*OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), *unwrap(ContextRef),
&Message));
if (!*OutModule) {
@@ -38,19 +38,19 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
*OutMessage = strdup(Message.c_str());
return 1;
}
-
+
return 0;
}
/* Reads a module from the specified path, returning via the OutModule parameter
a module provider which performs lazy deserialization. Returns 0 on success.
- Optionally returns a human-readable error message via OutMessage. */
+ Optionally returns a human-readable error message via OutMessage. */
LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
LLVMMemoryBufferRef MemBuf,
LLVMModuleRef *OutM,
char **OutMessage) {
std::string Message;
-
+
*OutM = wrap(getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef),
&Message));
if (!*OutM) {
@@ -58,7 +58,7 @@ LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
*OutMessage = strdup(Message.c_str());
return 1;
}
-
+
return 0;
}
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 96b3925ed7..9b28c9d60a 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -487,7 +487,7 @@ bool BitcodeReader::ParseAttributeBlock() {
Attributes::get(Context, B)));
}
- MAttributes.push_back(AttrListPtr::get(Attrs));
+ MAttributes.push_back(AttrListPtr::get(Context, Attrs));
Attrs.clear();
break;
}
@@ -607,7 +607,7 @@ bool BitcodeReader::ParseTypeTableBody() {
else
break;
}
-
+
ResultTy = getTypeByID(Record[2]);
if (ResultTy == 0 || ArgTys.size() < Record.size()-3)
return Error("invalid type in function type");
@@ -626,7 +626,7 @@ bool BitcodeReader::ParseTypeTableBody() {
else
break;
}
-
+
ResultTy = getTypeByID(Record[1]);
if (ResultTy == 0 || ArgTys.size() < Record.size()-2)
return Error("invalid type in function type");
@@ -657,10 +657,10 @@ bool BitcodeReader::ParseTypeTableBody() {
case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N]
if (Record.size() < 1)
return Error("Invalid STRUCT type record");
-
+
if (NumRecords >= TypeList.size())
return Error("invalid TYPE table");
-
+
// Check to see if this was forward referenced, if so fill in the temp.
StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
if (Res) {
@@ -669,7 +669,7 @@ bool BitcodeReader::ParseTypeTableBody() {
} else // Otherwise, create a new struct.
Res = StructType::create(Context, TypeName);
TypeName.clear();
-
+
SmallVector<Type*, 8> EltTys;
for (unsigned i = 1, e = Record.size(); i != e; ++i) {
if (Type *T = getTypeByID(Record[i]))
@@ -689,7 +689,7 @@ bool BitcodeReader::ParseTypeTableBody() {
if (NumRecords >= TypeList.size())
return Error("invalid TYPE table");
-
+
// Check to see if this was forward referenced, if so fill in the temp.
StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
if (Res) {
@@ -700,7 +700,7 @@ bool BitcodeReader::ParseTypeTableBody() {
TypeName.clear();
ResultTy = Res;
break;
- }
+ }
case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty]
if (Record.size() < 2)
return Error("Invalid ARRAY type record");
@@ -1006,7 +1006,7 @@ bool BitcodeReader::ParseConstants() {
APInt VInt = ReadWideAPInt(Record,
cast<IntegerType>(CurTy)->getBitWidth());
V = ConstantInt::get(Context, VInt);
-
+
break;
}
case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval]
@@ -1073,10 +1073,10 @@ bool BitcodeReader::ParseConstants() {
case bitc::CST_CODE_DATA: {// DATA: [n x value]
if (Record.empty())
return Error("Invalid CST_DATA record");
-
+
Type *EltTy = cast<SequentialType>(CurTy)->getElementType();
unsigned Size = Record.size();
-
+
if (EltTy->isIntegerTy(8)) {
SmallVector<uint8_t, 16> Elts(Record.begin(), Record.end());
if (isa<VectorType>(CurTy))
@@ -1182,10 +1182,11 @@ bool BitcodeReader::ParseConstants() {
}
case bitc::CST_CODE_CE_SELECT: // CE_SELECT: [opval#, opval#, opval#]
if (Record.size() < 3) return Error("Invalid CE_SELECT record");
- V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0],
- Type::getInt1Ty(Context)),
- ValueList.getConstantFwdRef(Record[1],CurTy),
- ValueList.getConstantFwdRef(Record[2],CurTy));
+ V = ConstantExpr::getSelect(
+ ValueList.getConstantFwdRef(Record[0],
+ Type::getInt1Ty(Context)),
+ ValueList.getConstantFwdRef(Record[1],CurTy),
+ ValueList.getConstantFwdRef(Record[2],CurTy));
break;
case bitc::CST_CODE_CE_EXTRACTELT: { // CE_EXTRACTELT: [opty, opval, opval]
if (Record.size() < 3) return Error("Invalid CE_EXTRACTELT record");
@@ -1193,7 +1194,8 @@ bool BitcodeReader::ParseConstants() {
dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
if (OpTy == 0) return Error("Invalid CE_EXTRACTELT record");
Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
- Constant *Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
+ Constant *Op1 = ValueList.getConstantFwdRef(Record[2],
+ Type::getInt32Ty(Context));
V = ConstantExpr::getExtractElement(Op0, Op1);
break;
}
@@ -1204,7 +1206,8 @@ bool BitcodeReader::ParseConstants() {
Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
Constant *Op1 = ValueList.getConstantFwdRef(Record[1],
OpTy->getElementType());
- Constant *Op2 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
+ Constant *Op2 = ValueList.getConstantFwdRef(Record[2],
+ Type::getInt32Ty(Context));
V = ConstantExpr::getInsertElement(Op0, Op1, Op2);
break;
}
@@ -1324,7 +1327,7 @@ bool BitcodeReader::ParseConstants() {
V = FwdRef;
}
break;
- }
+ }
}
ValueList.AssignValue(V, NextCstNo);
@@ -1348,7 +1351,7 @@ bool BitcodeReader::ParseUseLists() {
return Error("Malformed block record");
SmallVector<uint64_t, 64> Record;
-
+
// Read all the records.
while (1) {
unsigned Code = Stream.ReadCode();
@@ -1357,7 +1360,7 @@ bool BitcodeReader::ParseUseLists() {
return Error("Error at end of use-list table block");
return false;
}
-
+
if (Code == bitc::ENTER_SUBBLOCK) {
// No known subblocks, always skip them.
Stream.ReadSubBlockID();
@@ -1365,12 +1368,12 @@ bool BitcodeReader::ParseUseLists() {
return Error("Malformed block record");
continue;
}
-
+
if (Code == bitc::DEFINE_ABBREV) {
Stream.ReadAbbrevRecord();
continue;
}
-
+
// Read a use list record.
Record.clear();
switch (Stream.ReadRecord(Code, Record)) {
@@ -1927,7 +1930,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned CurBBNo = 0;
DebugLoc LastLoc;
-
+
// Read all the records.
SmallVector<uint64_t, 64> Record;
while (1) {
@@ -1982,24 +1985,24 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
FunctionBBs[i] = BasicBlock::Create(Context, "", F);
CurBB = FunctionBBs[0];
continue;
-
+
case bitc::FUNC_CODE_DEBUG_LOC_AGAIN: // DEBUG_LOC_AGAIN
// This record indicates that the last instruction is at the same
// location as the previous instruction with a location.
I = 0;
-
+
// Get the last instruction emitted.
if (CurBB && !CurBB->empty())
I = &CurBB->back();
else if (CurBBNo && FunctionBBs[CurBBNo-1] &&
!FunctionBBs[CurBBNo-1]->empty())
I = &FunctionBBs[CurBBNo-1]->back();
-
+
if (I == 0) return Error("Invalid DEBUG_LOC_AGAIN record");
I->setDebugLoc(LastLoc);
I = 0;
continue;
-
+
case bitc::FUNC_CODE_DEBUG_LOC: { // DEBUG_LOC: [line, col, scope, ia]
I = 0; // Get the last instruction emitted.
if (CurBB && !CurBB->empty())
@@ -2009,10 +2012,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
I = &FunctionBBs[CurBBNo-1]->back();
if (I == 0 || Record.size() < 4)
return Error("Invalid FUNC_CODE_DEBUG_LOC record");
-
+
unsigned Line = Record[0], Col = Record[1];
unsigned ScopeID = Record[2], IAID = Record[3];
-
+
MDNode *Scope = 0, *IA = 0;
if (ScopeID) Scope = cast<MDNode>(MDValueList.getValueFwdRef(ScopeID-1));
if (IAID) IA = cast<MDNode>(MDValueList.getValueFwdRef(IAID-1));
@@ -2280,10 +2283,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
break;
}
case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...]
- // Check magic
+ // Check magic
if ((Record[0] >> 16) == SWITCH_INST_MAGIC) {
// New SwitchInst format with case ranges.
-
+
Type *OpTy = getTypeByID(Record[1]);
unsigned ValueBitWidth = cast<IntegerType>(OpTy)->getBitWidth();
@@ -2293,17 +2296,17 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
return Error("Invalid SWITCH record");
unsigned NumCases = Record[4];
-
+
SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases);
InstructionList.push_back(SI);
-
+
unsigned CurIdx = 5;
for (unsigned i = 0; i != NumCases; ++i) {
IntegersSubsetToBB CaseBuilder;
unsigned NumItems = Record[CurIdx++];
for (unsigned ci = 0; ci != NumItems; ++ci) {
bool isSingleNumber = Record[CurIdx++];
-
+
APInt Low;
unsigned ActiveWords = 1;
if (ValueBitWidth > 64)
@@ -2319,7 +2322,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
APInt High =
ReadWideAPInt(makeArrayRef(&Record[CurIdx], ActiveWords),
ValueBitWidth);
-
+
CaseBuilder.add(IntItem::fromType(OpTy, Low),
IntItem::fromType(OpTy, High));
CurIdx += ActiveWords;
@@ -2327,7 +2330,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
CaseBuilder.add(IntItem::fromType(OpTy, Low));
}
BasicBlock *DestBB = getBasicBlock(Record[CurIdx++]);
- IntegersSubset Case = CaseBuilder.getCase();
+ IntegersSubset Case = CaseBuilder.getCase();
SI->addCase(Case, DestBB);
}
uint16_t Hash = SI->hash();
@@ -2336,9 +2339,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
I = SI;
break;
}
-
+
// Old SwitchInst format without case ranges.
-
+
if (Record.size() < 3 || (Record.size() & 1) == 0)
return Error("Invalid SWITCH record");
Type *OpTy = getTypeByID(Record[0]);
@@ -2383,7 +2386,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
I = IBI;
break;
}
-
+
case bitc::FUNC_CODE_INST_INVOKE: {
// INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...]
if (Record.size() < 4) return Error("Invalid INVOKE record");
@@ -2542,7 +2545,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
OpNum+4 != Record.size())
return Error("Invalid LOADATOMIC record");
-
+
AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
if (Ordering == NotAtomic || Ordering == Release ||
@@ -2758,15 +2761,15 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
unsigned BlockIdx = RefList[i].first;
if (BlockIdx >= FunctionBBs.size())
return Error("Invalid blockaddress block #");
-
+
GlobalVariable *FwdRef = RefList[i].second;
FwdRef->replaceAllUsesWith(BlockAddress::get(F, FunctionBBs[BlockIdx]));
FwdRef->eraseFromParent();
}
-
+
BlockAddrFwdRefs.erase(BAFRI);
}
-
+
// Trim the value list down to the size it was before we parsed this function.
ValueList.shrinkTo(ModuleValueListSize);
MDValueList.shrinkTo(ModuleMDValueListSize);
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index 3d5c0eb4de..f3b516ecfd 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -27,14 +27,14 @@
namespace llvm {
class MemoryBuffer;
class LLVMContext;
-
+
//===----------------------------------------------------------------------===//
// BitcodeReaderValueList Class
//===----------------------------------------------------------------------===//
class BitcodeReaderValueList {
std::vector<WeakVH> ValuePtrs;
-
+
/// ResolveConstants - As we resolve forward-referenced constants, we add
/// information about them to this vector. This allows us to resolve them in
/// bulk instead of resolving each reference at a time. See the code in
@@ -57,17 +57,17 @@ public:
void push_back(Value *V) {
ValuePtrs.push_back(V);
}
-
+
void clear() {
assert(ResolveConstants.empty() && "Constants not resolved?");
ValuePtrs.clear();
}
-
+
Value *operator[](unsigned i) const {
assert(i < ValuePtrs.size());
return ValuePtrs[i];
}
-
+
Value *back() const { return ValuePtrs.back(); }
void pop_back() { ValuePtrs.pop_back(); }
bool empty() const { return ValuePtrs.empty(); }
@@ -75,12 +75,12 @@ public:
assert(N <= size() && "Invalid shrinkTo request!");
ValuePtrs.resize(N);
}
-
+
Constant *getConstantFwdRef(unsigned Idx, Type *Ty);
Value *getValueFwdRef(unsigned Idx, Type *Ty);
-
+
void AssignValue(Value *V, unsigned Idx);
-
+
/// ResolveConstantForwardRefs - Once all constants are read, this method bulk
/// resolves any forward references.
void ResolveConstantForwardRefs();
@@ -93,7 +93,7 @@ public:
class BitcodeReaderMDValueList {
std::vector<WeakVH> MDValuePtrs;
-
+
LLVMContext &Context;
public:
BitcodeReaderMDValueList(LLVMContext& C) : Context(C) {}
@@ -106,12 +106,12 @@ public:
Value *back() const { return MDValuePtrs.back(); }
void pop_back() { MDValuePtrs.pop_back(); }
bool empty() const { return MDValuePtrs.empty(); }
-
+
Value *operator[](unsigned i) const {
assert(i < MDValuePtrs.size());
return MDValuePtrs[i];
}
-
+
void shrinkTo(unsigned N) {
assert(N <= size() && "Invalid shrinkTo request!");
MDValuePtrs.resize(N);
@@ -131,9 +131,9 @@ class BitcodeReader : public GVMaterializer {
DataStreamer *LazyStreamer;
uint64_t NextUnreadBit;
bool SeenValueSymbolTable;
-
+
const char *ErrorString;
-
+
std::vector<Type*> TypeList;
BitcodeReaderValueList ValueList;
BitcodeReaderMDValueList MDValueList;
@@ -142,38 +142,38 @@ class BitcodeReader : public GVMaterializer {
std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
-
+
/// MAttributes - The set of attributes by index. Index zero in the
/// file is for null, and is thus not represented here. As such all indices
/// are off by one.
std::vector<AttrListPtr> MAttributes;
-
+
/// FunctionBBs - While parsing a function body, this is a list of the basic
/// blocks for the function.
std::vector<BasicBlock*> FunctionBBs;
-
+
// When reading the module header, this list is populated with functions that
// have bodies later in the file.
std::vector<Function*> FunctionsWithBodies;
- // When intrinsic functions are encountered which require upgrading they are
+ // When intrinsic functions are encountered which require upgrading they are
// stored here with their replacement function.
typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap;
UpgradedIntrinsicMap UpgradedIntrinsics;
// Map the bitcode's custom MDKind ID to the Module's MDKind ID.
DenseMap<unsigned, unsigned> MDKindMap;
-
+
// Several operations happen after the module header has been read, but
// before function bodies are processed. This keeps track of whether
// we've done this yet.
bool SeenFirstFunctionBody;
-
+
/// DeferredFunctionInfo - When function bodies are initially scanned, this
/// map contains info about where to find deferred function body in the
/// stream.
DenseMap<Function*, uint64_t> DeferredFunctionInfo;
-
+
/// BlockAddrFwdRefs - These are blockaddr references to basic blocks. These
/// are resolved lazily when functions are loaded.
typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy;
@@ -208,11 +208,11 @@ public:
void materializeForwardReferencedFunctions();
void FreeState();
-
+
/// setBufferOwned - If this is true, the reader will destroy the MemoryBuffer
/// when the reader is destroyed.
void setBufferOwned(bool Owned) { BufferOwned = Owned; }
-
+
virtual bool isMaterializable(const GlobalValue *GV) const;
virtual bool isDematerializable(const GlobalValue *GV) const;
virtual bool Materialize(GlobalValue *GV, std::string *ErrInfo = 0);
@@ -224,7 +224,7 @@ public:
return true;
}
const char *getErrorString() const { return ErrorString; }
-
+
/// @brief Main interface to parsing a bitcode buffer.
/// @returns true if an error occurred.
bool ParseBitcodeInto(Module *M);
@@ -251,7 +251,7 @@ private:
return MAttributes[i-1];
return AttrListPtr();
}
-
+
/// getValueTypePair - Read a value/type pair out of the specified record from
/// slot 'Slot'. Increment Slot past the number of slots used in the record.
/// Return true on failure.
@@ -339,7 +339,7 @@ private:
bool FindFunctionInStream(Function *F,
DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator);
};
-
+
} // End llvm namespace
#endif
diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp
index 4288422463..9f51c35ad9 100644
--- a/lib/Bitcode/Writer/BitWriter.cpp
+++ b/lib/Bitcode/Writer/BitWriter.cpp
@@ -17,12 +17,11 @@ using namespace llvm;
int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) {
std::string ErrorInfo;
- raw_fd_ostream OS(Path, ErrorInfo,
- raw_fd_ostream::F_Binary);
-
+ raw_fd_ostream OS(Path, ErrorInfo, raw_fd_ostream::F_Binary);
+
if (!ErrorInfo.empty())
return -1;
-
+
WriteBitcodeToFile(unwrap(M), OS);
return 0;
}
@@ -30,7 +29,7 @@ int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) {
int LLVMWriteBitcodeToFD(LLVMModuleRef M, int FD, int ShouldClose,
int Unbuffered) {
raw_fd_ostream OS(FD, ShouldClose, Unbuffered);
-
+
WriteBitcodeToFile(unwrap(M), OS);
return 0;
}
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 60c657ae6d..cf3c9fd74e 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -61,7 +61,7 @@ enum {
FUNCTION_INST_RET_VOID_ABBREV,
FUNCTION_INST_RET_VAL_ABBREV,
FUNCTION_INST_UNREACHABLE_ABBREV,
-
+
// SwitchInst Magic
SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex
};
@@ -234,7 +234,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv);
-
+
// Abbrev for TYPE_CODE_ARRAY.
Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
@@ -256,16 +256,16 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
switch (T->getTypeID()) {
default: llvm_unreachable("Unknown type!");
- case Type::VoidTyID: Code = bitc::TYPE_CODE_VOID; break;
- case Type::HalfTyID: Code = bitc::TYPE_CODE_HALF; break;
- case Type::FloatTyID: Code = bitc::TYPE_CODE_FLOAT; break;
- case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break;
- case Type::X86_FP80TyID: Code = bitc::TYPE_CODE_X86_FP80; break;
- case Type::FP128TyID: Code = bitc::TYPE_CODE_FP128; break;
+ case Type::VoidTyID: Code = bitc::TYPE_CODE_VOID; break;
+ case Type::HalfTyID: Code = bitc::TYPE_CODE_HALF; break;
+ case Type::FloatTyID: Code = bitc::TYPE_CODE_FLOAT; break;
+ case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break;
+ case Type::X86_FP80TyID: Code = bitc::TYPE_CODE_X86_FP80; break;
+ case Type::FP128TyID: Code = bitc::TYPE_CODE_FP128; break;
case Type::PPC_FP128TyID: Code = bitc::TYPE_CODE_PPC_FP128; break;
- case Type::LabelTyID: Code = bitc::TYPE_CODE_LABEL; break;
- case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break;
- case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break;
+ case Type::LabelTyID: Code = bitc::TYPE_CODE_LABEL; break;
+ case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break;
+ case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break;
case Type::IntegerTyID:
// INTEGER: [width]
Code = bitc::TYPE_CODE_INTEGER;
@@ -300,7 +300,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
for (StructType::element_iterator I = ST->element_begin(),
E = ST->element_end(); I != E; ++I)
TypeVals.push_back(VE.getTypeID(*I));
-
+
if (ST->isLiteral()) {
Code = bitc::TYPE_CODE_STRUCT_ANON;
AbbrevToUse = StructAnonAbbrev;
@@ -658,7 +658,7 @@ static void WriteFunctionLocalMetadata(const Function &F,
}
WriteMDNode(N, VE, Stream, Record);
}
-
+
if (StartedMetadataBlock)
Stream.ExitBlock();
}
@@ -673,18 +673,18 @@ static void WriteMetadataAttachment(const Function &F,
// Write metadata attachments
// METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]]
SmallVector<std::pair<unsigned, MDNode*>, 4> MDs;
-
+
for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
I != E; ++I) {
MDs.clear();
I->getAllMetadataOtherThanDebugLoc(MDs);
-
+
// If no metadata, ignore instruction.
if (MDs.empty()) continue;
Record.push_back(VE.getInstructionID(I));
-
+
for (unsigned i = 0, e = MDs.size(); i != e; ++i) {
Record.push_back(MDs[i].first);
Record.push_back(VE.getValueID(MDs[i].second));
@@ -703,16 +703,16 @@ static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) {
// METADATA_KIND - [n x [id, name]]
SmallVector<StringRef, 4> Names;
M->getMDKindNames(Names);
-
+
if (Names.empty()) return;
Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
-
+
for (unsigned MDKindID = 0, e = Names.size(); MDKindID != e; ++MDKindID) {
Record.push_back(MDKindID);
StringRef KName = Names[MDKindID];
Record.append(KName.begin(), KName.end());
-
+
Stream.EmitRecord(bitc::METADATA_KIND, Record, 0);
Record.clear();
}
@@ -743,10 +743,10 @@ static void EmitAPInt(SmallVectorImpl<uint64_t> &Vals,
// format it is likely that the high bits are going to be zero.
// So, we only write the number of active words.
unsigned NWords = Val.getActiveWords();
-
+
if (EmitSizeForWideNumbers)
Vals.push_back(NWords);
-
+
const uint64_t *RawWords = Val.getRawData();
for (unsigned i = 0; i != NWords; ++i) {
emitSignedInt64(Vals, RawWords[i]);
@@ -881,12 +881,12 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
if (isCStrChar6)
isCStrChar6 = BitCodeAbbrevOp::isChar6(V);
}
-
+
if (isCStrChar6)
AbbrevToUse = CString6Abbrev;
else if (isCStr7)
AbbrevToUse = CString7Abbrev;
- } else if (const ConstantDataSequential *CDS =
+ } else if (const ConstantDataSequential *CDS =
dyn_cast<ConstantDataSequential>(C)) {
Code = bitc::CST_CODE_DATA;
Type *EltTy = CDS->getType()->getElementType();
@@ -1179,13 +1179,13 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
// Redefine Vals, since here we need to use 64 bit values
// explicitly to store large APInt numbers.
SmallVector<uint64_t, 128> Vals64;
-
+
Code = bitc::FUNC_CODE_INST_SWITCH;
SwitchInst &SI = cast<SwitchInst>(I);
-
- uint32_t SwitchRecordHeader = SI.hash() | (SWITCH_INST_MAGIC << 16);
- Vals64.push_back(SwitchRecordHeader);
-
+
+ uint32_t SwitchRecordHeader = SI.hash() | (SWITCH_INST_MAGIC << 16);
+ Vals64.push_back(SwitchRecordHeader);
+
Vals64.push_back(VE.getTypeID(SI.getCondition()->getType()));
pushValue64(SI.getCondition(), InstID, Vals64, VE);
Vals64.push_back(VE.getValueID(SI.getDefaultDest()));
@@ -1194,21 +1194,21 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
i != e; ++i) {
IntegersSubset& CaseRanges = i.getCaseValueEx();
unsigned Code, Abbrev; // will unused.
-
+
if (CaseRanges.isSingleNumber()) {
Vals64.push_back(1/*NumItems = 1*/);
Vals64.push_back(true/*IsSingleNumber = true*/);
EmitAPInt(Vals64, Code, Abbrev, CaseRanges.getSingleNumber(0), true);
} else {
-
+
Vals64.push_back(CaseRanges.getNumItems());
-
+
if (CaseRanges.isSingleNumbersOnly()) {
for (unsigned ri = 0, rn = CaseRanges.getNumItems();
ri != rn; ++ri) {
-
+
Vals64.push_back(true/*IsSingleNumber = true*/);
-
+
EmitAPInt(Vals64, Code, Abbrev,
CaseRanges.getSingleNumber(ri), true);
}
@@ -1217,9 +1217,9 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
ri != rn; ++ri) {
IntegersSubset::Range r = CaseRanges.getItem(ri);
bool IsSingleNumber = CaseRanges.isSingleNumber(ri);
-
+
Vals64.push_back(IsSingleNumber);
-
+
EmitAPInt(Vals64, Code, Abbrev, r.getLow(), true);
if (!IsSingleNumber)
EmitAPInt(Vals64, Code, Abbrev, r.getHigh(), true);
@@ -1227,9 +1227,9 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
}
Vals64.push_back(VE.getValueID(i.getCaseSuccessor()));
}
-
+
Stream.EmitRecord(Code, Vals64, AbbrevToUse);
-
+
// Also do expected action - clear external Vals collection:
Vals.clear();
return;
@@ -1243,7 +1243,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i)
Vals.push_back(VE.getValueID(I.getOperand(i)));
break;
-
+
case Instruction::Invoke: {
const InvokeInst *II = cast<InvokeInst>(&I);
const Value *Callee(II->getCalledValue());
@@ -1502,21 +1502,21 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
unsigned InstID = CstEnd;
bool NeedsMetadataAttachment = false;
-
+
DebugLoc LastDL;
-
+
// Finally, emit all the instructions, in order.
for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
I != E; ++I) {
WriteInstruction(*I, InstID, VE, Stream, Vals);
-
+
if (!I->getType()->isVoidTy())
++InstID;
-
+
// If the instruction has metadata, write a metadata attachment later.
NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc();
-
+
// If the instruction has a debug location, emit it.
DebugLoc DL = I->getDebugLoc();
if (DL.isUnknown()) {
@@ -1527,14 +1527,14 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
} else {
MDNode *Scope, *IA;
DL.getScopeAndInlinedAt(Scope, IA, I->getContext());
-
+
Vals.push_back(DL.getLine());
Vals.push_back(DL.getCol());
Vals.push_back(Scope ? VE.getValueID(Scope)+1 : 0);
Vals.push_back(IA ? VE.getValueID(IA)+1 : 0);
Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
Vals.clear();
-
+
LastDL = DL;
}
}
@@ -1709,7 +1709,7 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Stream.ExitBlock();
}
-// Sort the Users based on the order in which the reader parses the bitcode
+// Sort the Users based on the order in which the reader parses the bitcode
// file.
static bool bitcodereader_order(const User *lhs, const User *rhs) {
// TODO: Implement.
@@ -1778,9 +1778,9 @@ static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE,
for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
I != E; ++I)
I->removeDeadConstantUsers();
-
+
// Write the global variables.
- for (Module::const_global_iterator GI = M->global_begin(),
+ for (Module::const_global_iterator GI = M->global_begin(),
GE = M->global_end(); GI != GE; ++GI) {
WriteUseList(GI, VE, Stream);
diff --git a/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index 91e115cba6..e5e76e29bd 100644
--- a/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -22,9 +22,9 @@ namespace {
static char ID; // Pass identification, replacement for typeid
explicit WriteBitcodePass(raw_ostream &o)
: ModulePass(ID), OS(o) {}
-
+
const char *getPassName() const { return "Bitcode Writer"; }
-
+
bool runOnModule(Module &M) {
WriteBitcodeToFile(&M, OS);
return false;
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 1ed9004eb5..6c43f433b8 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -24,8 +24,8 @@
#include <algorithm>
using namespace llvm;
-static bool isIntegerValue(const std::pair<const Value*, unsigned> &V) {
- return V.first->getType()->isIntegerTy();
+static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
+ return V.first->getType()->isIntOrIntVectorTy();
}
/// ValueEnumerator - Enumerate module-level information.
@@ -95,7 +95,7 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
I->getAllMetadataOtherThanDebugLoc(MDs);
for (unsigned i = 0, e = MDs.size(); i != e; ++i)
EnumerateMetadata(MDs[i].second);
-
+
if (!I->getDebugLoc().isUnknown()) {
MDNode *Scope, *IA;
I->getDebugLoc().getScopeAndInlinedAt(Scope, IA, I->getContext());
@@ -192,10 +192,11 @@ void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
CstSortPredicate P(*this);
std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P);
- // Ensure that integer constants are at the start of the constant pool. This
- // is important so that GEP structure indices come before gep constant exprs.
+ // Ensure that integer and vector of integer constants are at the start of the
+ // constant pool. This is important so that GEP structure indices come before
+ // gep constant exprs.
std::partition(Values.begin()+CstStart, Values.begin()+CstEnd,
- isIntegerValue);
+ isIntOrIntVectorValue);
// Rebuild the modified portion of ValueMap.
for (; CstStart != CstEnd; ++CstStart)
@@ -362,16 +363,16 @@ void ValueEnumerator::EnumerateType(Type *Ty) {
if (StructType *STy = dyn_cast<StructType>(Ty))
if (!STy->isLiteral())
*TypeID = ~0U;
-
+
// Enumerate all of the subtypes before we enumerate this type. This ensures
// that the type will be enumerated in an order that can be directly built.
for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
I != E; ++I)
EnumerateType(*I);
-
+
// Refresh the TypeID pointer in case the table rehashed.
TypeID = &TypeMap[Ty];
-
+
// Check to see if we got the pointer another way. This can happen when
// enumerating recursive types that hit the base case deeper than they start.
//
@@ -379,10 +380,10 @@ void ValueEnumerator::EnumerateType(Type *Ty) {
// then emit the definition now that all of its contents are available.
if (*TypeID && *TypeID != ~0U)
return;
-
+
// Add this type now that its contents are all happily enumerated.
Types.push_back(Ty);
-
+
*TypeID = Types.size();
}
@@ -390,7 +391,7 @@ void ValueEnumerator::EnumerateType(Type *Ty) {
// walk through it, enumerating the types of the constant.
void ValueEnumerator::EnumerateOperandType(const Value *V) {
EnumerateType(V->getType());
-
+
if (const Constant *C = dyn_cast<Constant>(V)) {
// If this constant is already enumerated, ignore it, we know its type must
// be enumerated.
@@ -400,11 +401,11 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) {
// them.
for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
const Value *Op = C->getOperand(i);
-
+
// Don't enumerate basic blocks here, this happens as operands to
// blockaddress.
if (isa<BasicBlock>(Op)) continue;
-
+
EnumerateOperandType(Op);
}
@@ -481,7 +482,7 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
if (N->isFunctionLocal() && N->getFunction())
FnLocalMDVector.push_back(N);
}
-
+
if (!I->getType()->isVoidTy())
EnumerateValue(I);
}
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index 75468e6c5e..896fc3d0c8 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -51,15 +51,15 @@ private:
ValueList MDValues;
SmallVector<const MDNode *, 8> FunctionLocalMDs;
ValueMapType MDValueMap;
-
+
typedef DenseMap<void*, unsigned> AttributeMapType;
AttributeMapType AttributeMap;
std::vector<AttrListPtr> Attributes;
-
+
/// GlobalBasicBlockIDs - This map memoizes the basic block ID's referenced by
/// the "getGlobalBasicBlockID" method.
mutable DenseMap<const BasicBlock*, unsigned> GlobalBasicBlockIDs;
-
+
typedef DenseMap<const Instruction*, unsigned> InstructionMapType;
InstructionMapType InstructionMap;
unsigned InstructionCount;
@@ -67,7 +67,7 @@ private:
/// BasicBlocks - This contains all the basic blocks for the currently
/// incorporated function. Their reverse mapping is stored in ValueMap.
std::vector<const BasicBlock*> BasicBlocks;
-
+
/// When a function is incorporated, this is the size of the Values list
/// before incorporation.
unsigned NumModuleValues;
@@ -111,20 +111,20 @@ public:
Start = FirstFuncConstantID;
End = FirstInstID;
}
-
+
const ValueList &getValues() const { return Values; }
const ValueList &getMDValues() const { return MDValues; }
- const SmallVector<const MDNode *, 8> &getFunctionLocalMDValues() const {
+ const SmallVector<const MDNode *, 8> &getFunctionLocalMDValues() const {
return FunctionLocalMDs;
}
const TypeList &getTypes() const { return Types; }
const std::vector<const BasicBlock*> &getBasicBlocks() const {
- return BasicBlocks;
+ return BasicBlocks;
}
const std::vector<AttrListPtr> &getAttributes() const {
return Attributes;
}
-
+
/// getGlobalBasicBlockID - This returns the function-specific ID for the
/// specified basic block. This is relatively expensive information, so it
/// should only be used by rare constructs such as address-of-label.
@@ -138,7 +138,7 @@ public:
private:
void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
-
+
void EnumerateMDNodeOperands(const MDNode *N);
void EnumerateMetadata(const Value *MD);
void EnumerateFunctionLocalMetadata(const MDNode *N);
@@ -147,7 +147,7 @@ private:
void EnumerateType(Type *T);
void EnumerateOperandType(const Value *V);
void EnumerateAttributes(const AttrListPtr &PAL);
-
+
void EnumerateValueSymbolTable(const ValueSymbolTable &ST);
void EnumerateNamedMetadata(const Module *M);
};
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index b2ebf04e51..dec80a43f6 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -69,24 +69,69 @@ void ARMException::EndFunction() {
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
Asm->getFunctionNumber()));
- // Emit references to personality.
- if (const Function * Personality =
- MMI->getPersonalities()[MMI->getPersonalityIndex()]) {
- MCSymbol *PerSym = Asm->Mang->getSymbol(Personality);
- Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global);
- Asm->OutStreamer.EmitPersonality(PerSym);
- }
-
if (EnableARMEHABIDescriptors) {
// Map all labels and get rid of any dead landing pads.
MMI->TidyLandingPads();
- Asm->OutStreamer.EmitHandlerData();
+ if (!MMI->getLandingPads().empty()) {
+ // Emit references to personality.
+ if (const Function * Personality =
+ MMI->getPersonalities()[MMI->getPersonalityIndex()]) {
+ MCSymbol *PerSym = Asm->Mang->getSymbol(Personality);
+ Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global);
+ Asm->OutStreamer.EmitPersonality(PerSym);
+ }
- // Emit actual exception table
- EmitExceptionTable();
+ // Emit .handlerdata directive.
+ Asm->OutStreamer.EmitHandlerData();
+
+ // Emit actual exception table
+ EmitExceptionTable();
+ }
}
}
Asm->OutStreamer.EmitFnEnd();
}
+
+void ARMException::EmitTypeInfos(unsigned TTypeEncoding) {
+ const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+
+ bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+
+ int Entry = 0;
+ // Emit the Catch TypeInfos.
+ if (VerboseAsm && !TypeInfos.empty()) {
+ Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
+ Asm->OutStreamer.AddBlankLine();
+ Entry = TypeInfos.size();
+ }
+
+ for (std::vector<const GlobalVariable *>::const_reverse_iterator
+ I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
+ const GlobalVariable *GV = *I;
+ if (VerboseAsm)
+ Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
+ Asm->EmitTTypeReference(GV, TTypeEncoding);
+ }
+
+ // Emit the Exception Specifications.
+ if (VerboseAsm && !FilterIds.empty()) {
+ Asm->OutStreamer.AddComment(">> Filter TypeInfos <<");
+ Asm->OutStreamer.AddBlankLine();
+ Entry = 0;
+ }
+ for (std::vector<unsigned>::const_iterator
+ I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
+ unsigned TypeID = *I;
+ if (VerboseAsm) {
+ --Entry;
+ if (TypeID != 0)
+ Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));
+ }
+
+ Asm->EmitTTypeReference((TypeID == 0 ? 0 : TypeInfos[TypeID - 1]),
+ TTypeEncoding);
+ }
+}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index b4f0b174b5..89f278b54c 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -90,9 +90,6 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
return NumBits;
}
-
-
-
AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
: MachineFunctionPass(ID),
TM(tm), MAI(tm.getMCAsmInfo()),
@@ -130,7 +127,6 @@ const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
return TM.getTargetLowering()->getObjFileLowering();
}
-
/// getDataLayout - Return information about data layout.
const DataLayout &AsmPrinter::getDataLayout() const {
return *TM.getDataLayout();
@@ -1671,7 +1667,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
}
return Byte;
}
-
+
if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V))
return isRepeatedByteSequence(CDS);
@@ -1680,7 +1676,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
unsigned AddrSpace,AsmPrinter &AP){
-
+
// See if we can aggregate this into a .fill, if so, emit it as such.
int Value = isRepeatedByteSequence(CDS, AP.TM);
if (Value != -1) {
@@ -1689,7 +1685,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
if (Bytes > 1)
return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
}
-
+
// If this can be emitted with .ascii/.asciz, emit it as such.
if (CDS->isString())
return AP.OutStreamer.EmitBytes(CDS->getAsString(), AddrSpace);
@@ -1713,7 +1709,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
float F;
uint32_t I;
};
-
+
F = CDS->getElementAsFloat(i);
if (AP.isVerbose())
AP.OutStreamer.GetCommentOS() << "float " << F << '\n';
@@ -1726,7 +1722,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
double F;
uint64_t I;
};
-
+
F = CDS->getElementAsDouble(i);
if (AP.isVerbose())
AP.OutStreamer.GetCommentOS() << "double " << F << '\n';
@@ -1935,7 +1931,7 @@ static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
return emitGlobalConstantDataSequential(CDS, AddrSpace, AP);
-
+
if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
return emitGlobalConstantArray(CVA, AddrSpace, AP);
@@ -1957,10 +1953,10 @@ static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
return emitGlobalConstantImpl(New, AddrSpace, AP);
}
}
-
+
if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
return emitGlobalConstantVector(V, AddrSpace, AP);
-
+
// Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
// thread the streamer with EmitValue.
AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size, AddrSpace);
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index d94e1fe61b..b9aa5fc193 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -53,7 +53,7 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
void AsmPrinter::EmitCFAByte(unsigned Val) const {
if (isVerbose()) {
if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset+64)
- OutStreamer.AddComment("DW_CFA_offset + Reg (" +
+ OutStreamer.AddComment("DW_CFA_offset + Reg (" +
Twine(Val-dwarf::DW_CFA_offset) + ")");
else
OutStreamer.AddComment(dwarf::CallFrameString(Val));
@@ -83,7 +83,7 @@ static const char *DecodeDWARFEncoding(unsigned Encoding) {
case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8:
return "indirect pcrel sdata8";
}
-
+
return "<unknown encoding>";
}
@@ -101,7 +101,7 @@ void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const {
OutStreamer.AddComment(Twine("Encoding = ") +
DecodeDWARFEncoding(Val));
}
-
+
OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
}
@@ -109,7 +109,7 @@ void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const {
unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
if (Encoding == dwarf::DW_EH_PE_omit)
return 0;
-
+
switch (Encoding & 0x07) {
default: llvm_unreachable("Invalid encoded value.");
case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize();
@@ -119,20 +119,16 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
}
}
-void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const {
- const TargetLoweringObjectFile &TLOF = getObjFileLowering();
-
- const MCExpr *Exp =
- TLOF.getExprForDwarfReference(Sym, Encoding, OutStreamer);
- OutStreamer.EmitAbsValue(Exp, GetSizeOfEncodedValue(Encoding));
-}
+void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
+ unsigned Encoding) const {
+ if (GV) {
+ const TargetLoweringObjectFile &TLOF = getObjFileLowering();
-void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{
- const TargetLoweringObjectFile &TLOF = getObjFileLowering();
-
- const MCExpr *Exp =
- TLOF.getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, OutStreamer);
- OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0);
+ const MCExpr *Exp =
+ TLOF.getTTypeGlobalReference(GV, Mang, MMI, Encoding, OutStreamer);
+ OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0);
+ } else
+ OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding), 0);
}
/// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its
@@ -149,22 +145,22 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
OutStreamer.EmitCOFFSecRel32(Label);
return;
}
-
+
// Get the section that we're referring to, based on SectionLabel.
const MCSection &Section = SectionLabel->getSection();
-
+
// If Label has already been emitted, verify that it is in the same section as
// section label for sanity.
assert((!Label->isInSection() || &Label->getSection() == &Section) &&
"Section offset using wrong section base for label");
-
+
// If the section in question will end up with an address of 0 anyway, we can
// just emit an absolute reference to save a relocation.
if (Section.isBaseAddressKnownZero()) {
OutStreamer.EmitSymbolValue(Label, 4, 0/*AddrSpace*/);
return;
}
-
+
// Otherwise, emit it as a label difference from the start of the section.
EmitLabelDifference(Label, SectionLabel, 4);
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 2b07dda31f..4ca42283dd 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -132,11 +132,11 @@ void CompileUnit::addSourceLine(DIE *Die, DIVariable V) {
// Verify variable.
if (!V.Verify())
return;
-
+
unsigned Line = V.getLineNumber();
if (Line == 0)
return;
- unsigned FileID = DD->GetOrCreateSourceID(V.getContext().getFilename(),
+ unsigned FileID = DD->getOrCreateSourceID(V.getContext().getFilename(),
V.getContext().getDirectory());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
@@ -153,7 +153,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
unsigned Line = G.getLineNumber();
if (Line == 0)
return;
- unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(), G.getDirectory());
+ unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -171,7 +171,7 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
if (Line == 0)
return;
- unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(),
+ unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(),
SP.getDirectory());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
@@ -188,7 +188,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
unsigned Line = Ty.getLineNumber();
if (Line == 0)
return;
- unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(),
+ unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(),
Ty.getDirectory());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
@@ -206,7 +206,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) {
if (Line == 0)
return;
DIFile File = Ty.getFile();
- unsigned FileID = DD->GetOrCreateSourceID(File.getFilename(),
+ unsigned FileID = DD->getOrCreateSourceID(File.getFilename(),
File.getDirectory());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
@@ -225,15 +225,15 @@ void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) {
return;
StringRef FN = NS.getFilename();
- unsigned FileID = DD->GetOrCreateSourceID(FN, NS.getDirectory());
+ unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
}
-/// addVariableAddress - Add DW_AT_location attribute for a
+/// addVariableAddress - Add DW_AT_location attribute for a
/// DbgVariable based on provided MachineLocation.
-void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die,
+void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die,
MachineLocation Location) {
if (DV->variableHasComplexAddress())
addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
@@ -492,7 +492,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
case 64: Form = dwarf::DW_FORM_data8; break;
default: break;
}
- SignedConstant ? addSInt(Block, 0, Form, MO.getImm())
+ SignedConstant ? addSInt(Block, 0, Form, MO.getImm())
: addUInt(Block, 0, Form, MO.getImm());
addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
@@ -535,7 +535,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
case 16: form = dwarf::DW_FORM_data2; break;
case 32: form = dwarf::DW_FORM_data4; break;
case 64: form = dwarf::DW_FORM_data8; break;
- default:
+ default:
form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata;
}
if (Unsigned)
@@ -635,7 +635,7 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
DwarfAccelTable::eTypeFlagClassIsImplementation : 0;
addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags));
}
-
+
addToContextOwner(TyDIE, Ty.getContext());
return TyDIE;
}
@@ -670,8 +670,8 @@ void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) {
///
void CompileUnit::addGlobalType(DIType Ty) {
DIDescriptor Context = Ty.getContext();
- if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl()
- && (!Context || Context.isCompileUnit() || Context.isFile()
+ if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl()
+ && (!Context || Context.isCompileUnit() || Context.isFile()
|| Context.isNameSpace()))
if (DIEEntry *Entry = getDIEEntry(Ty))
GlobalTypes[Ty.getName()] = Entry->getEntry();
@@ -830,7 +830,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
else if (SP.isPrivate())
addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_private);
- else
+ else
addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_public);
if (SP.isExplicit())
@@ -878,7 +878,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
if (Property.isNonAtomicObjCProperty())
PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
if (PropertyAttributes)
- addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0,
+ addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0,
PropertyAttributes);
DIEEntry *Entry = getDIEEntry(Element);
@@ -951,7 +951,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
}
}
-/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
+/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
/// for the given DITemplateTypeParameter.
DIE *
CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
@@ -965,7 +965,7 @@ CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
return ParamDIE;
}
-/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE
+/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE
/// for the given DITemplateValueParameter.
DIE *
CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){
@@ -977,7 +977,7 @@ CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){
addType(ParamDIE, TPV.getType());
if (!TPV.getName().empty())
addString(ParamDIE, dwarf::DW_AT_name, TPV.getName());
- addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
+ addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
TPV.getValue());
return ParamDIE;
}
@@ -1095,7 +1095,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
if (!SP.isDefinition()) {
addFlag(SPDie, dwarf::DW_AT_declaration);
-
+
// Add arguments. Do not add arguments for subprogram definition. They will
// be handled while processing variables.
DICompositeType SPTy = SP.getType();
@@ -1213,7 +1213,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
} else {
addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
}
- } else if (const ConstantInt *CI =
+ } else if (const ConstantInt *CI =
dyn_cast_or_null<ConstantInt>(GV.getConstant()))
addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType());
else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
@@ -1226,7 +1226,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
Asm->Mang->getSymbol(cast<GlobalValue>(Ptr)));
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
SmallVector<Value*, 3> Idx(CE->op_begin()+1, CE->op_end());
- addUInt(Block, 0, dwarf::DW_FORM_udata,
+ addUInt(Block, 0, dwarf::DW_FORM_udata,
Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx));
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
@@ -1255,7 +1255,7 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR,
// The L value defines the lower bounds which is typically zero for C/C++. The
// H value is the upper bounds. Values are 64 bit. H - L + 1 is the size
- // of the array. If L > H then do not emit DW_AT_lower_bound and
+ // of the array. If L > H then do not emit DW_AT_lower_bound and
// DW_AT_upper_bound attributes. If L is zero and H is also zero then the
// array has one element and in such case do not emit lower bound.
@@ -1376,20 +1376,20 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) {
unsigned FrameReg = 0;
const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
- int Offset =
- TFI->getFrameIndexReference(*Asm->MF,
- DVInsn->getOperand(1).getImm(),
+ int Offset =
+ TFI->getFrameIndexReference(*Asm->MF,
+ DVInsn->getOperand(1).getImm(),
FrameReg);
MachineLocation Location(FrameReg, Offset);
addVariableAddress(DV, VariableDie, Location);
-
+
} else if (RegOp.getReg())
- addVariableAddress(DV, VariableDie,
+ addVariableAddress(DV, VariableDie,
MachineLocation(RegOp.getReg()));
updated = true;
}
else if (DVInsn->getOperand(0).isImm())
- updated =
+ updated =
addConstantValue(VariableDie, DVInsn->getOperand(0),
DV->getType());
else if (DVInsn->getOperand(0).isFPImm())
@@ -1397,11 +1397,11 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
addConstantFPValue(VariableDie, DVInsn->getOperand(0));
else if (DVInsn->getOperand(0).isCImm())
updated =
- addConstantValue(VariableDie,
+ addConstantValue(VariableDie,
DVInsn->getOperand(0).getCImm(),
DV->getType().isUnsignedDIType());
} else {
- addVariableAddress(DV, VariableDie,
+ addVariableAddress(DV, VariableDie,
Asm->getDebugValueLocation(DVInsn));
updated = true;
}
@@ -1419,7 +1419,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
if (FI != ~0) {
unsigned FrameReg = 0;
const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
- int Offset =
+ int Offset =
TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
MachineLocation Location(FrameReg, Offset);
addVariableAddress(DV, VariableDie, Location);
@@ -1499,7 +1499,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_private);
// Otherwise C++ member and base classes are considered public.
- else
+ else
addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_public);
if (DT.isVirtual())
@@ -1509,7 +1509,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
// Objective-C properties.
if (MDNode *PNode = DT.getObjCProperty())
if (DIEEntry *PropertyDie = getDIEEntry(PNode))
- MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4,
+ MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4,
PropertyDie);
// This is only for backward compatibility.
@@ -1536,7 +1536,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
if (DT.isNonAtomicObjCProperty())
PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
if (PropertyAttributes)
- addUInt(MemberDie, dwarf::DW_AT_APPLE_property_attribute, 0,
+ addUInt(MemberDie, dwarf::DW_AT_APPLE_property_attribute, 0,
PropertyAttributes);
}
return MemberDie;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 22535fe5b4..3858cabb81 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -78,6 +78,15 @@ static cl::opt<DefaultOnOff> DarwinGDBCompat("darwin-gdb-compat", cl::Hidden,
clEnumValEnd),
cl::init(Default));
+static cl::opt<DefaultOnOff> DwarfFission("dwarf-fission", cl::Hidden,
+ cl::desc("Output prototype dwarf fission."),
+ cl::values(
+ clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"),
+ clEnumVal(Disable, "Disabled"),
+ clEnumValEnd),
+ cl::init(Default));
+
namespace {
const char *DWARFGroupName = "DWARF Emission";
const char *DbgTimerName = "DWARF Debug Writer";
@@ -102,19 +111,19 @@ DIType DbgVariable::getType() const {
either the struct, or a pointer to the struct, as its type. This
is necessary for various behind-the-scenes things the compiler
needs to do with by-reference variables in blocks.
-
+
However, as far as the original *programmer* is concerned, the
variable should still have type 'SomeType', as originally declared.
-
+
The following function dives into the __Block_byref_x_VarName
struct to find the original type of the variable. This will be
passed back to the code generating the type for the Debug
Information Entry for the variable 'VarName'. 'VarName' will then
have the original type 'SomeType' in its debug information.
-
+
The original type 'SomeType' will be the type of the field named
'VarName' inside the __Block_byref_x_VarName struct.
-
+
NOTE: In order for this to not completely fail on the debugger
side, the Debug Information Entry for the variable VarName needs to
have a DW_AT_location that tells the debugger how to unwind through
@@ -122,15 +131,15 @@ DIType DbgVariable::getType() const {
value of the variable. The function addBlockByrefType does this. */
DIType subType = Ty;
unsigned tag = Ty.getTag();
-
+
if (tag == dwarf::DW_TAG_pointer_type) {
DIDerivedType DTy = DIDerivedType(Ty);
subType = DTy.getTypeDerivedFrom();
}
-
+
DICompositeType blockStruct = DICompositeType(subType);
DIArray Elements = blockStruct.getTypeArray();
-
+
for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
DIDescriptor Element = Elements.getElement(i);
DIDerivedType DT = DIDerivedType(Element);
@@ -157,34 +166,39 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
// Turn on accelerator tables and older gdb compatibility
// for Darwin.
- bool isDarwin = Triple(M->getTargetTriple()).isOSDarwin();
+ bool IsDarwin = Triple(M->getTargetTriple()).isOSDarwin();
if (DarwinGDBCompat == Default) {
- if (isDarwin)
- isDarwinGDBCompat = true;
+ if (IsDarwin)
+ IsDarwinGDBCompat = true;
else
- isDarwinGDBCompat = false;
+ IsDarwinGDBCompat = false;
} else
- isDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false;
+ IsDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false;
if (DwarfAccelTables == Default) {
- if (isDarwin)
- hasDwarfAccelTables = true;
+ if (IsDarwin)
+ HasDwarfAccelTables = true;
else
- hasDwarfAccelTables = false;
+ HasDwarfAccelTables = false;
} else
- hasDwarfAccelTables = DwarfAccelTables == Enable ? true : false;
+ HasDwarfAccelTables = DwarfAccelTables == Enable ? true : false;
+
+ if (DwarfFission == Default)
+ HasDwarfFission = false;
+ else
+ HasDwarfFission = DwarfFission == Enable ? true : false;
{
NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
- beginModule(M);
+ beginModule();
}
}
DwarfDebug::~DwarfDebug() {
}
-/// EmitSectionSym - Switch to the specified MCSection and emit an assembler
+/// emitSectionSym - Switch to the specified MCSection and emit an assembler
/// temporary label to it if SymbolStem is specified.
-static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
+static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section,
const char *SymbolStem = 0) {
Asm->OutStreamer.SwitchSection(Section);
if (!SymbolStem) return 0;
@@ -275,7 +289,7 @@ static StringRef getObjCMethodName(StringRef In) {
static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP,
DIE* Die) {
if (!SP.isDefinition()) return;
-
+
TheCU->addAccelName(SP.getName(), Die);
// If the linkage name is different than the name, go ahead and output
@@ -347,8 +361,8 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
}
DIE *SPDeclDie = SPDie;
SPDie = new DIE(dwarf::DW_TAG_subprogram);
- SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
- SPDeclDie);
+ SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification,
+ dwarf::DW_FORM_ref4, SPDeclDie);
SPCU->addDie(SPDie);
}
}
@@ -365,13 +379,13 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
// Add name to the name table, we do this here because we're guaranteed
// to have concrete versions of our DW_TAG_subprogram nodes.
addSubprogramNames(SPCU, SP, SPDie);
-
+
return SPDie;
}
/// constructLexicalScope - Construct new DW_TAG_lexical_block
/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
-DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
+DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
LexicalScope *Scope) {
DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
if (Scope->isAbstractScope())
@@ -387,7 +401,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
// .debug_range as a uint, size 4, for now. emitDIE will handle
// DW_AT_ranges appropriately.
TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
- DebugRangeSymbols.size()
+ DebugRangeSymbols.size()
* Asm->getDataLayout().getPointerSize());
for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
RE = Ranges.end(); RI != RE; ++RI) {
@@ -453,7 +467,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
// .debug_range as a uint, size 4, for now. emitDIE will handle
// DW_AT_ranges appropriately.
TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
- DebugRangeSymbols.size()
+ DebugRangeSymbols.size()
* Asm->getDataLayout().getPointerSize());
for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
RE = Ranges.end(); RI != RE; ++RI) {
@@ -463,9 +477,9 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
DebugRangeSymbols.push_back(NULL);
DebugRangeSymbols.push_back(NULL);
} else {
- TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
StartLabel);
- TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
EndLabel);
}
@@ -487,13 +501,13 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
DILocation DL(Scope->getInlinedAt());
TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0,
- GetOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
+ getOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
// Add name to the name table, we do this here because we're guaranteed
// to have concrete versions of our DW_TAG_inlined_subprogram nodes.
addSubprogramNames(TheCU, InlinedSP, ScopeDIE);
-
+
return ScopeDIE;
}
@@ -509,7 +523,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
if (LScopes.isCurrentFunctionScope(Scope))
for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i)
if (DbgVariable *ArgDV = CurrentFnArguments[i])
- if (DIE *Arg =
+ if (DIE *Arg =
TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) {
Children.push_back(Arg);
if (ArgDV->isObjectPointer()) ObjectPointer = Arg;
@@ -518,7 +532,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
// Collect lexical scope children first.
const SmallVector<DbgVariable *, 8> &Variables = ScopeVariables.lookup(Scope);
for (unsigned i = 0, N = Variables.size(); i < N; ++i)
- if (DIE *Variable =
+ if (DIE *Variable =
TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) {
Children.push_back(Variable);
if (Variables[i]->isObjectPointer()) ObjectPointer = Variable;
@@ -548,7 +562,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
return NULL;
ScopeDIE = constructLexicalScopeDIE(TheCU, Scope);
}
-
+
if (!ScopeDIE) return NULL;
// Add children
@@ -566,16 +580,16 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
return ScopeDIE;
}
-/// GetOrCreateSourceID - Look up the source id with the given directory and
+/// getOrCreateSourceID - Look up the source id with the given directory and
/// source file names. If none currently exists, create a new id and insert it
/// in the SourceIds map. This can update DirectoryNames and SourceFileNames
/// maps as well.
-unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName,
+unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName,
StringRef DirName,
StringRef Extra) { // @LOCALMOD
// If FE did not provide a file name, then assume stdin.
if (FileName.empty())
- return GetOrCreateSourceID("<stdin>", StringRef());
+ return getOrCreateSourceID("<stdin>", StringRef());
// TODO: this might not belong here. See if we can factor this better.
if (DirName == CompilationDir)
@@ -609,7 +623,7 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName,
// E.g., compile foo.c with -DMACRO1 to foo1.bc, then compile
// foo.c again with -DMACRO2 to foo2.bc and link.
// We use additional information to form a unique ID in that case.
-unsigned DwarfDebug::GetOrCreateCompileUnitID(StringRef Filename,
+unsigned DwarfDebug::getOrCreateCompileUnitID(StringRef Filename,
StringRef Dirname,
const MDNode *N) {
std::string DIUnitStr;
@@ -621,7 +635,7 @@ unsigned DwarfDebug::GetOrCreateCompileUnitID(StringRef Filename,
// Cheat and use the MDNode's address as an additional identifying factor.
// constructCompileUnit() is only called once per compile unit.
ostr << static_cast<const void*>(N);
- return GetOrCreateSourceID(Filename, Dirname, ostr.str());
+ return getOrCreateSourceID(Filename, Dirname, ostr.str());
}
// @LOCALMOD-END
@@ -632,7 +646,7 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
StringRef FN = DIUnit.getFilename();
CompilationDir = DIUnit.getDirectory();
// @LOCALMOD
- unsigned ID = GetOrCreateCompileUnitID(FN, CompilationDir, N);
+ unsigned ID = getOrCreateCompileUnitID(FN, CompilationDir, N);
DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die,
@@ -660,7 +674,7 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
StringRef Flags = DIUnit.getFlags();
if (!Flags.empty())
NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
-
+
if (unsigned RVer = DIUnit.getRunTimeVersion())
NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
dwarf::DW_FORM_data1, RVer);
@@ -672,7 +686,7 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
}
/// construct SubprogramDIE - Construct subprogram DIE.
-void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
+void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
const MDNode *N) {
CompileUnit *&CURef = SPMap[N];
if (CURef)
@@ -698,28 +712,28 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
/// collectInfoFromNamedMDNodes - Collect debug info from named mdnodes such
/// as llvm.dbg.enum and llvm.dbg.ty
-void DwarfDebug::collectInfoFromNamedMDNodes(Module *M) {
+void DwarfDebug::collectInfoFromNamedMDNodes(const Module *M) {
if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp"))
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
const MDNode *N = NMD->getOperand(i);
if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit()))
constructSubprogramDIE(CU, N);
}
-
+
if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv"))
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
const MDNode *N = NMD->getOperand(i);
if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit()))
CU->createGlobalVariableDIE(N);
}
-
+
if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum"))
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
DIType Ty(NMD->getOperand(i));
if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit()))
CU->getOrCreateTypeDIE(Ty);
}
-
+
if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty"))
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
DIType Ty(NMD->getOperand(i));
@@ -730,10 +744,10 @@ void DwarfDebug::collectInfoFromNamedMDNodes(Module *M) {
/// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder.
/// FIXME - Remove this when dragon-egg and llvm-gcc switch to DIBuilder.
-bool DwarfDebug::collectLegacyDebugInfo(Module *M) {
+bool DwarfDebug::collectLegacyDebugInfo(const Module *M) {
DebugInfoFinder DbgFinder;
DbgFinder.processModule(*M);
-
+
bool HasDebugInfo = false;
// Scan all the compile-units to see if there are any marked as the main
// unit. If not, we do not generate debug info.
@@ -745,12 +759,12 @@ bool DwarfDebug::collectLegacyDebugInfo(Module *M) {
}
}
if (!HasDebugInfo) return false;
-
+
// Create all the compile unit DIEs.
for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
E = DbgFinder.compile_unit_end(); I != E; ++I)
constructCompileUnit(*I);
-
+
// Create DIEs for each global variable.
for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
E = DbgFinder.global_variable_end(); I != E; ++I) {
@@ -758,7 +772,7 @@ bool DwarfDebug::collectLegacyDebugInfo(Module *M) {
if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit()))
CU->createGlobalVariableDIE(N);
}
-
+
// Create DIEs for each subprogram.
for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
E = DbgFinder.subprogram_end(); I != E; ++I) {
@@ -773,10 +787,12 @@ bool DwarfDebug::collectLegacyDebugInfo(Module *M) {
/// beginModule - Emit all Dwarf sections that should come prior to the
/// content. Create global DIEs and emit initial debug info sections.
/// This is invoked by the target AsmPrinter.
-void DwarfDebug::beginModule(Module *M) {
+void DwarfDebug::beginModule() {
if (DisableDebugInfoPrinting)
return;
+ const Module *M = MMI->getModule();
+
// If module has named metadata anchors then use them, otherwise scan the
// module using debug info finder to collect debug info.
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
@@ -801,80 +817,91 @@ void DwarfDebug::beginModule(Module *M) {
return;
collectInfoFromNamedMDNodes(M);
-
+
// Tell MMI that we have debug info.
MMI->setDebugInfoAvailability(true);
-
- // Emit initial sections.
- EmitSectionLabels();
// Prime section data.
SectionMap.insert(Asm->getObjFileLowering().getTextSection());
}
-/// endModule - Emit all Dwarf sections that should come after the content.
-///
-void DwarfDebug::endModule() {
- if (!FirstCU) return;
+// Attach DW_AT_inline attribute with inlined subprogram DIEs.
+void DwarfDebug::computeInlinedDIEs() {
+ // Attach DW_AT_inline attribute with inlined subprogram DIEs.
+ for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
+ AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
+ DIE *ISP = *AI;
+ FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+ }
+ for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(),
+ AE = AbstractSPDies.end(); AI != AE; ++AI) {
+ DIE *ISP = AI->second;
+ if (InlinedSubprogramDIEs.count(ISP))
+ continue;
+ FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+ }
+}
+
+// Collect info for variables that were optimized out.
+void DwarfDebug::collectDeadVariables() {
const Module *M = MMI->getModule();
DenseMap<const MDNode *, LexicalScope *> DeadFnScopeMap;
- // Collect info for variables that were optimized out.
if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) {
for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
DICompileUnit TheCU(CU_Nodes->getOperand(i));
DIArray Subprograms = TheCU.getSubprograms();
for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) {
- DISubprogram SP(Subprograms.getElement(i));
- if (ProcessedSPNodes.count(SP) != 0) continue;
- if (!SP.Verify()) continue;
- if (!SP.isDefinition()) continue;
- DIArray Variables = SP.getVariables();
- if (Variables.getNumElements() == 0) continue;
-
- LexicalScope *Scope =
- new LexicalScope(NULL, DIDescriptor(SP), NULL, false);
- DeadFnScopeMap[SP] = Scope;
-
- // Construct subprogram DIE and add variables DIEs.
- CompileUnit *SPCU = CUMap.lookup(TheCU);
- assert(SPCU && "Unable to find Compile Unit!");
- constructSubprogramDIE(SPCU, SP);
- DIE *ScopeDIE = SPCU->getDIE(SP);
- for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
- DIVariable DV(Variables.getElement(vi));
- if (!DV.Verify()) continue;
- DbgVariable *NewVar = new DbgVariable(DV, NULL);
- if (DIE *VariableDIE =
- SPCU->constructVariableDIE(NewVar, Scope->isAbstractScope()))
- ScopeDIE->addChild(VariableDIE);
- }
+ DISubprogram SP(Subprograms.getElement(i));
+ if (ProcessedSPNodes.count(SP) != 0) continue;
+ if (!SP.Verify()) continue;
+ if (!SP.isDefinition()) continue;
+ DIArray Variables = SP.getVariables();
+ if (Variables.getNumElements() == 0) continue;
+
+ LexicalScope *Scope =
+ new LexicalScope(NULL, DIDescriptor(SP), NULL, false);
+ DeadFnScopeMap[SP] = Scope;
+
+ // Construct subprogram DIE and add variables DIEs.
+ CompileUnit *SPCU = CUMap.lookup(TheCU);
+ assert(SPCU && "Unable to find Compile Unit!");
+ constructSubprogramDIE(SPCU, SP);
+ DIE *ScopeDIE = SPCU->getDIE(SP);
+ for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
+ DIVariable DV(Variables.getElement(vi));
+ if (!DV.Verify()) continue;
+ DbgVariable *NewVar = new DbgVariable(DV, NULL);
+ if (DIE *VariableDIE =
+ SPCU->constructVariableDIE(NewVar, Scope->isAbstractScope()))
+ ScopeDIE->addChild(VariableDIE);
+ }
}
}
}
+ DeleteContainerSeconds(DeadFnScopeMap);
+}
+
+void DwarfDebug::finalizeModuleInfo() {
+ // Collect info for variables that were optimized out.
+ collectDeadVariables();
// Attach DW_AT_inline attribute with inlined subprogram DIEs.
- for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
- AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
- DIE *ISP = *AI;
- FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
- }
- for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(),
- AE = AbstractSPDies.end(); AI != AE; ++AI) {
- DIE *ISP = AI->second;
- if (InlinedSubprogramDIEs.count(ISP))
- continue;
- FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
- }
+ computeInlinedDIEs();
// Emit DW_AT_containing_type attribute to connect types with their
// vtable holding type.
for (DenseMap<const MDNode *, CompileUnit *>::iterator CUI = CUMap.begin(),
- CUE = CUMap.end(); CUI != CUE; ++CUI) {
+ CUE = CUMap.end(); CUI != CUE; ++CUI) {
CompileUnit *TheCU = CUI->second;
TheCU->constructContainingTypeDIEs();
}
+ // Compute DIE offsets and sizes.
+ computeSizeAndOffsets();
+}
+
+void DwarfDebug::endSections() {
// Standard sections final addresses.
Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection());
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("text_end"));
@@ -886,9 +913,23 @@ void DwarfDebug::endModule() {
Asm->OutStreamer.SwitchSection(SectionMap[I]);
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", I+1));
}
+}
- // Compute DIE offsets and sizes.
- computeSizeAndOffsets();
+/// endModule - Emit all Dwarf sections that should come after the content.
+///
+void DwarfDebug::endModule() {
+
+ if (!FirstCU) return;
+
+ // End any existing sections.
+ // TODO: Does this need to happen?
+ endSections();
+
+ // Finalize the debug info for the module.
+ finalizeModuleInfo();
+
+ // Emit initial sections.
+ emitSectionLabels();
// Emit all the DIEs into a debug info section
emitDebugInfo();
@@ -903,7 +944,7 @@ void DwarfDebug::endModule() {
emitAccelNamespaces();
emitAccelTypes();
}
-
+
// Emit info into a debug pubtypes section.
// TODO: When we don't need the option anymore we can
// remove all of the code that adds to the table.
@@ -914,7 +955,7 @@ void DwarfDebug::endModule() {
emitDebugLoc();
// Emit info into a debug aranges section.
- EmitDebugARanges();
+ emitDebugARanges();
// Emit info into a debug ranges section.
emitDebugRanges();
@@ -933,7 +974,6 @@ void DwarfDebug::endModule() {
emitDebugStr();
// clean up.
- DeleteContainerSeconds(DeadFnScopeMap);
SPMap.clear();
for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
E = CUMap.end(); I != E; ++I)
@@ -971,7 +1011,7 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
if (DV.getTag() != dwarf::DW_TAG_arg_variable)
return false;
unsigned ArgNo = DV.getArgNumber();
- if (ArgNo == 0)
+ if (ArgNo == 0)
return false;
size_t Size = CurrentFnArguments.size();
@@ -1026,8 +1066,8 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
/// getDebugLocEntry - Get .debug_loc entry for the instruction range starting
/// at MI.
-static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
- const MCSymbol *FLabel,
+static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
+ const MCSymbol *FLabel,
const MCSymbol *SLabel,
const MachineInstr *MI) {
const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata();
@@ -1131,7 +1171,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
SLabel = FunctionEndSym;
else {
const MachineInstr *End = HI[1];
- DEBUG(dbgs() << "DotDebugLoc Pair:\n"
+ DEBUG(dbgs() << "DotDebugLoc Pair:\n"
<< "\t" << *Begin << "\t" << *End << "\n");
if (End->isDebugValue())
SLabel = getLabelBeforeInsn(End);
@@ -1355,7 +1395,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
if (History.size() >= 2 &&
Prev->isIdenticalTo(History[History.size() - 2])) {
DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n"
- << "\t" << *Prev
+ << "\t" << *Prev
<< "\t" << *History[History.size() - 2] << "\n");
History.pop_back();
}
@@ -1441,7 +1481,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
const MachineInstr *Prev = History.back();
if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) {
const MachineBasicBlock *PrevMBB = Prev->getParent();
- MachineBasicBlock::const_iterator LastMI =
+ MachineBasicBlock::const_iterator LastMI =
PrevMBB->getLastNonDebugInstr();
if (LastMI == PrevMBB->end())
// Drop DBG_VALUE for empty range.
@@ -1490,10 +1530,10 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
Asm->getFunctionNumber());
// Assumes in correct section after the entry point.
Asm->OutStreamer.EmitLabel(FunctionEndSym);
-
+
SmallPtrSet<const MDNode *, 16> ProcessedVars;
collectVariableInfo(MF, ProcessedVars);
-
+
LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
assert(TheCU && "Unable to find compile unit!");
@@ -1523,9 +1563,9 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
if (ProcessedSPNodes.count(AScope->getScopeNode()) == 0)
constructScopeDIE(TheCU, AScope);
}
-
+
DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope);
-
+
if (!MF->getTarget().Options.DisableFramePointerElim(*MF))
TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr);
@@ -1580,7 +1620,7 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
} else
llvm_unreachable("Unexpected scope info");
- Src = GetOrCreateSourceID(Fn, Dir);
+ Src = getOrCreateSourceID(Fn, Dir);
}
Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, 0, Fn);
}
@@ -1592,7 +1632,7 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
/// computeSizeAndOffset - Compute the size and offset of a DIE.
///
unsigned
-DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
+DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset) {
// Get the children.
const std::vector<DIE *> &Children = Die->getChildren();
@@ -1623,7 +1663,7 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
"Children flag not set");
for (unsigned j = 0, M = Children.size(); j < M; ++j)
- Offset = computeSizeAndOffset(Children[j], Offset, (j + 1) == M);
+ Offset = computeSizeAndOffset(Children[j], Offset);
// End of children marker.
Offset += sizeof(int8_t);
@@ -1639,43 +1679,43 @@ void DwarfDebug::computeSizeAndOffsets() {
for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
E = CUMap.end(); I != E; ++I) {
// Compute size of compile unit header.
- unsigned Offset =
+ unsigned Offset =
sizeof(int32_t) + // Length of Compilation Unit Info
sizeof(int16_t) + // DWARF version number
sizeof(int32_t) + // Offset Into Abbrev. Section
sizeof(int8_t); // Pointer Size (in bytes)
- computeSizeAndOffset(I->second->getCUDie(), Offset, true);
+ computeSizeAndOffset(I->second->getCUDie(), Offset);
}
}
-/// EmitSectionLabels - Emit initial Dwarf sections with a label at
+/// emitSectionLabels - Emit initial Dwarf sections with a label at
/// the start of each one.
-void DwarfDebug::EmitSectionLabels() {
+void DwarfDebug::emitSectionLabels() {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
// Dwarf sections base addresses.
DwarfInfoSectionSym =
- EmitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
+ emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
DwarfAbbrevSectionSym =
- EmitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
- EmitSectionSym(Asm, TLOF.getDwarfARangesSection());
+ emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
+ emitSectionSym(Asm, TLOF.getDwarfARangesSection());
if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection())
- EmitSectionSym(Asm, MacroInfo);
+ emitSectionSym(Asm, MacroInfo);
- EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
- EmitSectionSym(Asm, TLOF.getDwarfLocSection());
- EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
+ emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
+ emitSectionSym(Asm, TLOF.getDwarfLocSection());
+ emitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
DwarfStrSectionSym =
- EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str");
- DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(),
+ emitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str");
+ DwarfDebugRangeSectionSym = emitSectionSym(Asm, TLOF.getDwarfRangesSection(),
"debug_range");
- DwarfDebugLocSectionSym = EmitSectionSym(Asm, TLOF.getDwarfLocSection(),
+ DwarfDebugLocSectionSym = emitSectionSym(Asm, TLOF.getDwarfLocSection(),
"section_debug_loc");
- TextSectionSym = EmitSectionSym(Asm, TLOF.getTextSection(), "text_begin");
- EmitSectionSym(Asm, TLOF.getDataSection());
+ TextSectionSym = emitSectionSym(Asm, TLOF.getTextSection(), "text_begin");
+ emitSectionSym(Asm, TLOF.getDataSection());
}
/// emitDIE - Recursively emits a debug information entry.
@@ -1806,7 +1846,7 @@ void DwarfDebug::emitDebugInfo() {
/// emitAbbreviations - Emit the abbreviation section.
///
-void DwarfDebug::emitAbbreviations() const {
+void DwarfDebug::emitAbbreviations() {
// Check to see if it is worth the effort.
if (!Abbreviations.empty()) {
// Start the debug abbrev section.
@@ -2103,7 +2143,7 @@ void DwarfDebug::emitDebugLoc() {
if (Entry.isInt()) {
DIBasicType BTy(DV.getType());
if (BTy.Verify() &&
- (BTy.getEncoding() == dwarf::DW_ATE_signed
+ (BTy.getEncoding() == dwarf::DW_ATE_signed
|| BTy.getEncoding() == dwarf::DW_ATE_signed_char)) {
Asm->OutStreamer.AddComment("DW_OP_consts");
Asm->EmitInt8(dwarf::DW_OP_consts);
@@ -2114,7 +2154,7 @@ void DwarfDebug::emitDebugLoc() {
Asm->EmitULEB128(Entry.getInt());
}
} else if (Entry.isLocation()) {
- if (!DV.hasComplexAddress())
+ if (!DV.hasComplexAddress())
// Regular entry.
Asm->EmitDwarfRegOp(Entry.Loc);
else {
@@ -2140,7 +2180,7 @@ void DwarfDebug::emitDebugLoc() {
} else {
Asm->EmitDwarfRegOp(Entry.Loc);
}
-
+
// Emit remaining complex address elements.
for (; i < N; ++i) {
uint64_t Element = DV.getAddrElement(i);
@@ -2162,9 +2202,9 @@ void DwarfDebug::emitDebugLoc() {
}
}
-/// EmitDebugARanges - Emit visible names into a debug aranges section.
+/// emitDebugARanges - Emit visible names into a debug aranges section.
///
-void DwarfDebug::EmitDebugARanges() {
+void DwarfDebug::emitDebugARanges() {
// Start the dwarf aranges section.
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfARangesSection());
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 475c6f86d9..3394483010 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -83,22 +83,22 @@ typedef struct DotDebugLocEntry {
const ConstantFP *CFP;
const ConstantInt *CIP;
} Constants;
- DotDebugLocEntry()
- : Begin(0), End(0), Variable(0), Merged(false),
+ DotDebugLocEntry()
+ : Begin(0), End(0), Variable(0), Merged(false),
Constant(false) { Constants.Int = 0;}
DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L,
- const MDNode *V)
- : Begin(B), End(E), Loc(L), Variable(V), Merged(false),
+ const MDNode *V)
+ : Begin(B), End(E), Loc(L), Variable(V), Merged(false),
Constant(false) { Constants.Int = 0; EntryKind = E_Location; }
DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i)
- : Begin(B), End(E), Variable(0), Merged(false),
+ : Begin(B), End(E), Variable(0), Merged(false),
Constant(true) { Constants.Int = i; EntryKind = E_Integer; }
DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr)
- : Begin(B), End(E), Variable(0), Merged(false),
+ : Begin(B), End(E), Variable(0), Merged(false),
Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; }
DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E,
const ConstantInt *IPtr)
- : Begin(B), End(E), Variable(0), Merged(false),
+ : Begin(B), End(E), Variable(0), Merged(false),
Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; }
/// Empty entries are also used as a trigger to emit temp label. Such
@@ -132,7 +132,7 @@ class DbgVariable {
int FrameIndex;
public:
// AbsVar may be NULL.
- DbgVariable(DIVariable V, DbgVariable *AV)
+ DbgVariable(DIVariable V, DbgVariable *AV)
: Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0),
FrameIndex(~0) {}
@@ -148,11 +148,11 @@ public:
void setMInsn(const MachineInstr *M) { MInsn = M; }
int getFrameIndex() const { return FrameIndex; }
void setFrameIndex(int FI) { FrameIndex = FI; }
- // Translate tag to proper Dwarf tag.
- unsigned getTag() const {
+ // Translate tag to proper Dwarf tag.
+ unsigned getTag() const {
if (Var.getTag() == dwarf::DW_TAG_arg_variable)
return dwarf::DW_TAG_formal_parameter;
-
+
return dwarf::DW_TAG_variable;
}
/// isArtificial - Return true if DbgVariable is artificial.
@@ -171,7 +171,7 @@ public:
return true;
return false;
}
-
+
bool variableHasComplexAddress() const {
assert(Var.Verify() && "Invalid complex DbgVariable!");
return Var.hasComplexAddress();
@@ -180,7 +180,7 @@ public:
assert(Var.Verify() && "Invalid complex DbgVariable!");
return Var.isBlockByrefVariable();
}
- unsigned getNumAddrElements() const {
+ unsigned getNumAddrElements() const {
assert(Var.Verify() && "Invalid complex DbgVariable!");
return Var.getNumAddrElements();
}
@@ -228,7 +228,7 @@ class DwarfDebug {
/// references.
StringMap<std::pair<MCSymbol*, unsigned>, BumpPtrAllocator&> StringPool;
unsigned NextStringPoolNumber;
-
+
/// SectionMap - Provides a unique id per text section.
///
SetVector<const MCSection*> SectionMap;
@@ -264,7 +264,7 @@ class DwarfDebug {
// are processed to create DIEs.
SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
- /// LabelsBeforeInsn - Maps instruction with label emitted before
+ /// LabelsBeforeInsn - Maps instruction with label emitted before
/// instruction.
DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
@@ -316,9 +316,11 @@ class DwarfDebug {
// table for the same directory as DW_at_comp_dir.
StringRef CompilationDir;
- // A holder for the DarwinGDBCompat flag so that the compile unit can use it.
- bool isDarwinGDBCompat;
- bool hasDwarfAccelTables;
+ // Holders for the various debug information flags that we might need to
+ // have exposed. See accessor functions below for description.
+ bool IsDarwinGDBCompat;
+ bool HasDwarfAccelTables;
+ bool HasDwarfFission;
private:
/// assignAbbrevNumber - Define a unique number for the abbreviation.
@@ -330,13 +332,13 @@ private:
/// findAbstractVariable - Find abstract variable associated with Var.
DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc);
- /// updateSubprogramScopeDIE - Find DIE for the given subprogram and
+ /// updateSubprogramScopeDIE - Find DIE for the given subprogram and
/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
/// If there are global variables in this scope then create and insert
/// DIEs for these variables.
DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, const MDNode *SPNode);
- /// constructLexicalScope - Construct new DW_TAG_lexical_block
+ /// constructLexicalScope - Construct new DW_TAG_lexical_block
/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
@@ -350,27 +352,43 @@ private:
/// EmitSectionLabels - Emit initial Dwarf sections with a label at
/// the start of each one.
- void EmitSectionLabels();
+ void emitSectionLabels();
/// emitDIE - Recursively Emits a debug information entry.
///
void emitDIE(DIE *Die);
- /// computeSizeAndOffset - Compute the size and offset of a DIE.
+ /// computeSizeAndOffset - Compute the size and offset of a DIE given
+ /// an incoming Offset.
///
- unsigned computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last);
+ unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
/// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
///
void computeSizeAndOffsets();
+ /// computeInlinedDIEs - Attach DW_AT_inline attribute with inlined
+ /// subprogram DIEs.
+ void computeInlinedDIEs();
+
+ /// collectDeadVariables - Collect info for variables that were optimized out.
+ void collectDeadVariables();
+
+ /// finalizeModuleInfo - Finish off debug information after all functions
+ /// have been processed.
+ void finalizeModuleInfo();
+
+ /// endSections - Emit labels to close any remaining sections that have
+ /// been left open.
+ void endSections();
+
/// EmitDebugInfo - Emit the debug info section.
///
void emitDebugInfo();
/// emitAbbreviations - Emit the abbreviation section.
///
- void emitAbbreviations() const;
+ void emitAbbreviations();
/// emitEndOfLineMatrix - Emit the last address of the section and the end of
/// the line matrix.
@@ -380,7 +398,7 @@ private:
/// emitAccelNames - Emit visible names into a hashed accelerator table
/// section.
void emitAccelNames();
-
+
/// emitAccelObjC - Emit objective C classes and categories into a hashed
/// accelerator table section.
void emitAccelObjC();
@@ -392,7 +410,7 @@ private:
/// emitAccelTypes() - Emit type dies into a hashed accelerator table.
///
void emitAccelTypes();
-
+
/// emitDebugPubTypes - Emit visible types into a debug pubtypes section.
///
void emitDebugPubTypes();
@@ -407,7 +425,7 @@ private:
/// EmitDebugARanges - Emit visible names into a debug aranges section.
///
- void EmitDebugARanges();
+ void emitDebugARanges();
/// emitDebugRanges - Emit visible names into a debug ranges section.
///
@@ -425,19 +443,19 @@ private:
///
/// Entries (one "entry" for each function that was inlined):
///
- /// 1. offset into __debug_str section for MIPS linkage name, if exists;
+ /// 1. offset into __debug_str section for MIPS linkage name, if exists;
/// otherwise offset into __debug_str for regular function name.
/// 2. offset into __debug_str section for regular function name.
- /// 3. an unsigned LEB128 number indicating the number of distinct inlining
+ /// 3. an unsigned LEB128 number indicating the number of distinct inlining
/// instances for the function.
- ///
- /// The rest of the entry consists of a {die_offset, low_pc} pair for each
+ ///
+ /// The rest of the entry consists of a {die_offset, low_pc} pair for each
/// inlined instance; the die_offset points to the inlined_subroutine die in
/// the __debug_info section, and the low_pc is the starting address for the
/// inlining instance.
void emitDebugInlineInfo();
- /// constructCompileUnit - Create new CompileUnit for the given
+ /// constructCompileUnit - Create new CompileUnit for the given
/// metadata node with tag DW_TAG_compile_unit.
CompileUnit *constructCompileUnit(const MDNode *N);
@@ -449,7 +467,7 @@ private:
/// the source line list.
void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
unsigned Flags);
-
+
/// identifyScopeMarkers() - Indentify instructions that are marking the
/// beginning of or ending of a scope.
void identifyScopeMarkers();
@@ -462,7 +480,7 @@ private:
/// collectVariableInfo - Populate LexicalScope entries with variables' info.
void collectVariableInfo(const MachineFunction *,
SmallPtrSet<const MDNode *, 16> &ProcessedVars);
-
+
/// collectVariableInfoFromMMITable - Collect variable information from
/// side table maintained by MMI.
void collectVariableInfoFromMMITable(const MachineFunction * MF,
@@ -493,15 +511,15 @@ public:
/// collectInfoFromNamedMDNodes - Collect debug info from named mdnodes such
/// as llvm.dbg.enum and llvm.dbg.ty
- void collectInfoFromNamedMDNodes(Module *M);
+ void collectInfoFromNamedMDNodes(const Module *M);
/// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder.
/// FIXME - Remove this when DragonEgg switches to DIBuilder.
- bool collectLegacyDebugInfo(Module *M);
+ bool collectLegacyDebugInfo(const Module *M);
/// beginModule - Emit all Dwarf sections that should come prior to the
/// content.
- void beginModule(Module *M);
+ void beginModule();
/// endModule - Emit all Dwarf sections that should come after the content.
///
@@ -521,16 +539,16 @@ public:
/// endInstruction - Prcess end of an instruction.
void endInstruction(const MachineInstr *MI);
- /// GetOrCreateSourceID - Look up the source id with the given directory and
+ /// getOrCreateSourceID - Look up the source id with the given directory and
/// source file names. If none currently exists, create a new id and insert it
/// in the SourceIds map.
- unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName,
+ unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName,
StringRef Extra = ""); // @LOCALMOD for Extra
// @LOCALMOD-BEGIN - Create an ID for CompileUnits, taking extra care
// in the case that we have multiple compile units coming from the
// same source file and directory.
- unsigned GetOrCreateCompileUnitID(StringRef FileName, StringRef DirName,
+ unsigned getOrCreateCompileUnitID(StringRef FileName, StringRef DirName,
const MDNode *N);
// @LOCALMOD-END
@@ -544,8 +562,17 @@ public:
/// useDarwinGDBCompat - returns whether or not to limit some of our debug
/// output to the limitations of darwin gdb.
- bool useDarwinGDBCompat() { return isDarwinGDBCompat; }
- bool useDwarfAccelTables() { return hasDwarfAccelTables; }
+ bool useDarwinGDBCompat() { return IsDarwinGDBCompat; }
+
+ // Experimental DWARF5 features.
+
+ /// useDwarfAccelTables - returns whether or not to emit tables that
+ /// dwarf consumers can use to accelerate lookup.
+ bool useDwarfAccelTables() { return HasDwarfAccelTables; }
+
+ /// useDwarfFission - returns whether or not to change the current debug
+ /// info for the fission proposal support.
+ bool useDwarfFission() { return HasDwarfFission; }
};
} // End of namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 08fb6b3f52..0bcb1b5cc8 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -672,6 +672,18 @@ void DwarfException::EmitExceptionTable() {
Asm->EmitSLEB128(Action.NextAction);
}
+ EmitTypeInfos(TTypeEncoding);
+
+ Asm->EmitAlignment(2);
+}
+
+void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) {
+ const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+
+ bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+
+ int Entry = 0;
// Emit the Catch TypeInfos.
if (VerboseAsm && !TypeInfos.empty()) {
Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
@@ -684,11 +696,7 @@ void DwarfException::EmitExceptionTable() {
const GlobalVariable *GV = *I;
if (VerboseAsm)
Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
- if (GV)
- Asm->EmitReference(GV, TTypeEncoding);
- else
- Asm->OutStreamer.EmitIntValue(0,Asm->GetSizeOfEncodedValue(TTypeEncoding),
- 0);
+ Asm->EmitTTypeReference(GV, TTypeEncoding);
}
// Emit the Exception Specifications.
@@ -708,8 +716,6 @@ void DwarfException::EmitExceptionTable() {
Asm->EmitULEB128(TypeID);
}
-
- Asm->EmitAlignment(2);
}
/// EndModule - Emit all exception information that should come after the
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index fe9e493609..74b1b13367 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -121,6 +121,8 @@ protected:
/// catches in the function. This tables is reversed indexed base 1.
void EmitExceptionTable();
+ virtual void EmitTypeInfos(unsigned TTypeEncoding);
+
public:
//===--------------------------------------------------------------------===//
// Main entry points.
@@ -175,6 +177,7 @@ public:
};
class ARMException : public DwarfException {
+ void EmitTypeInfos(unsigned TTypeEncoding);
public:
//===--------------------------------------------------------------------===//
// Main entry points.
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 6ae07dfb0b..b1460ed107 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -75,7 +75,7 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Formal argument #" << i << " has unhandled type "
- << EVT(ArgVT).getEVTString();
+ << EVT(ArgVT).getEVTString() << '\n';
#endif
llvm_unreachable(0);
}
@@ -107,7 +107,7 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Return operand #" << i << " has unhandled type "
- << EVT(VT).getEVTString();
+ << EVT(VT).getEVTString() << '\n';
#endif
llvm_unreachable(0);
}
@@ -125,7 +125,7 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Call operand #" << i << " has unhandled type "
- << EVT(ArgVT).getEVTString();
+ << EVT(ArgVT).getEVTString() << '\n';
#endif
llvm_unreachable(0);
}
@@ -144,7 +144,7 @@ void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Call operand #" << i << " has unhandled type "
- << EVT(ArgVT).getEVTString();
+ << EVT(ArgVT).getEVTString() << '\n';
#endif
llvm_unreachable(0);
}
@@ -161,7 +161,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
#ifndef NDEBUG
dbgs() << "Call result #" << i << " has unhandled type "
- << EVT(VT).getEVTString() << "\n";
+ << EVT(VT).getEVTString() << '\n';
#endif
llvm_unreachable(0);
}
@@ -174,7 +174,7 @@ void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
#ifndef NDEBUG
dbgs() << "Call result has unhandled type "
- << EVT(VT).getEVTString();
+ << EVT(VT).getEVTString() << '\n';
#endif
llvm_unreachable(0);
}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 24daafaa62..91ec038725 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -191,7 +191,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
// emission fails.
MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI,
STI, *Context);
- MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU);
+ MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(),
+ TargetCPU);
if (MCE == 0 || MAB == 0)
return true;
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index c3bf2d234c..8585cbb30d 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -59,8 +59,16 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def,
return VNI;
}
if (SlotIndex::isSameInstr(Def, I->start)) {
- assert(I->start == Def && "Cannot insert def, already live");
- assert(I->valno->def == Def && "Inconsistent existing value def");
+ assert(I->valno->def == I->start && "Inconsistent existing value def");
+
+ // It is possible to have both normal and early-clobber defs of the same
+ // register on an instruction. It doesn't make a lot of sense, but it is
+ // possible to specify in inline assembly.
+ //
+ // Just convert everything to early-clobber.
+ Def = std::min(Def, I->start);
+ if (Def != I->start)
+ I->start = I->valno->def = Def;
return I->valno;
}
assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def");
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 34b24b6085..667c7613d7 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -983,7 +983,6 @@ MachineBasicBlock::LivenessQueryResult
MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
unsigned Reg, MachineInstr *MI,
unsigned Neighborhood) {
-
unsigned N = Neighborhood;
MachineBasicBlock *MBB = MI->getParent();
@@ -998,14 +997,18 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
MachineOperandIteratorBase::PhysRegInfo Analysis =
MIOperands(I).analyzePhysReg(Reg, TRI);
- if (Analysis.Kills)
+ if (Analysis.Defines)
+ // Outputs happen after inputs so they take precedence if both are
+ // present.
+ return Analysis.DefinesDead ? LQR_Dead : LQR_Live;
+
+ if (Analysis.Kills || Analysis.Clobbers)
// Register killed, so isn't live.
return LQR_Dead;
- else if (Analysis.DefinesOverlap || Analysis.ReadsOverlap)
+ else if (Analysis.ReadsOverlap)
// Defined or read without a previous kill - live.
- return (Analysis.Defines || Analysis.Reads) ?
- LQR_Live : LQR_OverlappingLive;
+ return Analysis.Reads ? LQR_Live : LQR_OverlappingLive;
} while (I != MBB->begin() && --N > 0);
}
@@ -1037,7 +1040,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
return (Analysis.Reads) ?
LQR_Live : LQR_OverlappingLive;
- else if (Analysis.DefinesOverlap)
+ else if (Analysis.Clobbers || Analysis.Defines)
// Defined (but not read) therefore cannot have been live.
return LQR_Dead;
}
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 0f260205df..dbc41defeb 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -84,7 +84,8 @@ namespace {
bool hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
SmallSet<unsigned,8> &PhysRefs,
- SmallVector<unsigned,2> &PhysDefs) const;
+ SmallVector<unsigned,2> &PhysDefs,
+ bool &PhysUseDef) const;
bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
SmallSet<unsigned,8> &PhysRefs,
SmallVector<unsigned,2> &PhysDefs,
@@ -194,31 +195,52 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
SmallSet<unsigned,8> &PhysRefs,
- SmallVector<unsigned,2> &PhysDefs) const{
- MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
+ SmallVector<unsigned,2> &PhysDefs,
+ bool &PhysUseDef) const{
+ // First, add all uses to PhysRefs.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
+ if (!MO.isReg() || MO.isDef())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
continue;
if (TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- // If the def is dead, it's ok. But the def may not marked "dead". That's
- // common since this pass is run before livevariables. We can scan
- // forward a few instructions and check if it is obviously dead.
- if (MO.isDef() &&
- (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
- continue;
// Reading constant physregs is ok.
if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
PhysRefs.insert(*AI);
- if (MO.isDef())
+ }
+
+ // Next, collect all defs into PhysDefs. If any is already in PhysRefs
+ // (which currently contains only uses), set the PhysUseDef flag.
+ PhysUseDef = false;
+ MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ // Check against PhysRefs even if the def is "dead".
+ if (PhysRefs.count(Reg))
+ PhysUseDef = true;
+ // If the def is dead, it's ok. But the def may not marked "dead". That's
+ // common since this pass is run before livevariables. We can scan
+ // forward a few instructions and check if it is obviously dead.
+ if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
PhysDefs.push_back(Reg);
}
+ // Finally, add all defs to PhysRefs as well.
+ for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
+ for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI)
+ PhysRefs.insert(*AI);
+
return !PhysRefs.empty();
}
@@ -407,8 +429,8 @@ void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
assert(SI != ScopeMap.end());
- ScopeMap.erase(SI);
delete SI->second;
+ ScopeMap.erase(SI);
}
bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
@@ -459,16 +481,22 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
bool CrossMBBPhysDef = false;
SmallSet<unsigned, 8> PhysRefs;
SmallVector<unsigned, 2> PhysDefs;
- if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) {
+ bool PhysUseDef = false;
+ if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
+ PhysDefs, PhysUseDef)) {
FoundCSE = false;
// ... Unless the CS is local or is in the sole predecessor block
// and it also defines the physical register which is not clobbered
// in between and the physical register uses were not clobbered.
- unsigned CSVN = VNT.lookup(MI);
- MachineInstr *CSMI = Exps[CSVN];
- if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
- FoundCSE = true;
+ // This can never be the case if the instruction both uses and
+ // defines the same physical register, which was detected above.
+ if (!PhysUseDef) {
+ unsigned CSVN = VNT.lookup(MI);
+ MachineInstr *CSMI = Exps[CSVN];
+ if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
+ FoundCSE = true;
+ }
}
if (!FoundCSE) {
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index 1f7fbfc719..70f97dedaa 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -281,7 +281,7 @@ MachineOperandIteratorBase::PhysRegInfo
MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
const TargetRegisterInfo *TRI) {
bool AllDefsDead = true;
- PhysRegInfo PRI = {false, false, false, false, false, false, false};
+ PhysRegInfo PRI = {false, false, false, false, false, false};
assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
"analyzePhysReg not given a physical register!");
@@ -305,7 +305,9 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
// Reg or a super-reg is read, and perhaps killed also.
PRI.Reads = true;
PRI.Kills = MO.isKill();
- } if (IsRegOrOverlapping && MO.readsReg()) {
+ }
+
+ if (IsRegOrOverlapping && MO.readsReg()) {
PRI.ReadsOverlap = true;// Reg or an overlapping register is read.
}
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 95d7a7dd68..34518fa46b 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -305,6 +305,8 @@ void MachineRegisterInfo::dumpUses(unsigned Reg) const {
#endif
void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
+ assert (!reservedRegsFrozen() &&
+ "freezeReservedRegs should only be called once!");
ReservedRegs = TRI->getReservedRegs(MF);
assert(ReservedRegs.size() == TRI->getNumRegs() &&
"Invalid ReservedRegs vector from target");
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index a4817d09c0..8d43360e67 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -58,6 +58,14 @@ static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden,
"before attempting to balance ILP"),
cl::init(10U));
+// Experimental heuristics
+static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
+ cl::desc("Enable load clustering."), cl::init(true));
+
+// Experimental heuristics
+static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
+ cl::desc("Enable scheduling for macro fusion."), cl::init(true));
+
//===----------------------------------------------------------------------===//
// Machine Instruction Scheduling Pass and Registry
//===----------------------------------------------------------------------===//
@@ -303,6 +311,19 @@ void ReadyQueue::dump() {
// preservation.
//===----------------------------------------------------------------------===//
+bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
+ if (SuccSU != &ExitSU) {
+ // Do not use WillCreateCycle, it assumes SD scheduling.
+ // If Pred is reachable from Succ, then the edge creates a cycle.
+ if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
+ return false;
+ Topo.AddPred(SuccSU, PredDep.getSUnit());
+ }
+ SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
+ // Return true regardless of whether a new edge needed to be inserted.
+ return true;
+}
+
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
/// NumPredsLeft reaches zero, release the successor node.
///
@@ -310,6 +331,12 @@ void ReadyQueue::dump() {
void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
SUnit *SuccSU = SuccEdge->getSUnit();
+ if (SuccEdge->isWeak()) {
+ --SuccSU->WeakPredsLeft;
+ if (SuccEdge->isCluster())
+ NextClusterSucc = SuccSU;
+ return;
+ }
#ifndef NDEBUG
if (SuccSU->NumPredsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
@@ -338,6 +365,12 @@ void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
SUnit *PredSU = PredEdge->getSUnit();
+ if (PredEdge->isWeak()) {
+ --PredSU->WeakSuccsLeft;
+ if (PredEdge->isCluster())
+ NextClusterPred = PredSU;
+ return;
+ }
#ifndef NDEBUG
if (PredSU->NumSuccsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
@@ -474,6 +507,8 @@ updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
void ScheduleDAGMI::schedule() {
buildDAGWithRegPressure();
+ Topo.InitDAGTopologicalSorting();
+
postprocessDAG();
DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
@@ -530,17 +565,20 @@ void ScheduleDAGMI::postprocessDAG() {
}
// Release all DAG roots for scheduling.
+//
+// Nodes with unreleased weak edges can still be roots.
void ScheduleDAGMI::releaseRoots() {
SmallVector<SUnit*, 16> BotRoots;
for (std::vector<SUnit>::iterator
I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
+ SUnit *SU = &(*I);
// A SUnit is ready to top schedule if it has no predecessors.
- if (I->Preds.empty())
- SchedImpl->releaseTopNode(&(*I));
+ if (!I->NumPredsLeft && SU != &EntrySU)
+ SchedImpl->releaseTopNode(SU);
// A SUnit is ready to bottom schedule if it has no successors.
- if (I->Succs.empty())
- BotRoots.push_back(&(*I));
+ if (!I->NumSuccsLeft && SU != &ExitSU)
+ BotRoots.push_back(SU);
}
// Release bottom roots in reverse order so the higher priority nodes appear
// first. This is more natural and slightly more efficient.
@@ -551,17 +589,18 @@ void ScheduleDAGMI::releaseRoots() {
/// Identify DAG roots and setup scheduler queues.
void ScheduleDAGMI::initQueues() {
+ NextClusterSucc = NULL;
+ NextClusterPred = NULL;
// Initialize the strategy before modifying the DAG.
SchedImpl->initialize(this);
- // Release edges from the special Entry node or to the special Exit node.
+ // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
+ releaseRoots();
+
releaseSuccessors(&EntrySU);
releasePredecessors(&ExitSU);
- // Release all DAG roots for scheduling.
- releaseRoots();
-
SchedImpl->registerRoots();
CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
@@ -655,6 +694,166 @@ void ScheduleDAGMI::dumpSchedule() const {
#endif
//===----------------------------------------------------------------------===//
+// LoadClusterMutation - DAG post-processing to cluster loads.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Post-process the DAG to create cluster edges between neighboring
+/// loads.
+class LoadClusterMutation : public ScheduleDAGMutation {
+ struct LoadInfo {
+ SUnit *SU;
+ unsigned BaseReg;
+ unsigned Offset;
+ LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
+ : SU(su), BaseReg(reg), Offset(ofs) {}
+ };
+ static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
+ const LoadClusterMutation::LoadInfo &RHS);
+
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+public:
+ LoadClusterMutation(const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri)
+ : TII(tii), TRI(tri) {}
+
+ virtual void apply(ScheduleDAGMI *DAG);
+protected:
+ void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
+};
+} // anonymous
+
+bool LoadClusterMutation::LoadInfoLess(
+ const LoadClusterMutation::LoadInfo &LHS,
+ const LoadClusterMutation::LoadInfo &RHS) {
+ if (LHS.BaseReg != RHS.BaseReg)
+ return LHS.BaseReg < RHS.BaseReg;
+ return LHS.Offset < RHS.Offset;
+}
+
+void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
+ ScheduleDAGMI *DAG) {
+ SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
+ for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
+ SUnit *SU = Loads[Idx];
+ unsigned BaseReg;
+ unsigned Offset;
+ if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
+ LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
+ }
+ if (LoadRecords.size() < 2)
+ return;
+ std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
+ unsigned ClusterLength = 1;
+ for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
+ if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
+ ClusterLength = 1;
+ continue;
+ }
+
+ SUnit *SUa = LoadRecords[Idx].SU;
+ SUnit *SUb = LoadRecords[Idx+1].SU;
+ if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
+ && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
+
+ DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
+ << SUb->NodeNum << ")\n");
+ // Copy successor edges from SUa to SUb. Interleaving computation
+ // dependent on SUa can prevent load combining due to register reuse.
+ // Predecessor edges do not need to be copied from SUb to SUa since nearby
+ // loads should have effectively the same inputs.
+ for (SUnit::const_succ_iterator
+ SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
+ if (SI->getSUnit() == SUb)
+ continue;
+ DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
+ DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
+ }
+ ++ClusterLength;
+ }
+ else
+ ClusterLength = 1;
+ }
+}
+
+/// \brief Callback from DAG postProcessing to create cluster edges for loads.
+void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
+ // Map DAG NodeNum to store chain ID.
+ DenseMap<unsigned, unsigned> StoreChainIDs;
+ // Map each store chain to a set of dependent loads.
+ SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
+ for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
+ SUnit *SU = &DAG->SUnits[Idx];
+ if (!SU->getInstr()->mayLoad())
+ continue;
+ unsigned ChainPredID = DAG->SUnits.size();
+ for (SUnit::const_pred_iterator
+ PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
+ if (PI->isCtrl()) {
+ ChainPredID = PI->getSUnit()->NodeNum;
+ break;
+ }
+ }
+ // Check if this chain-like pred has been seen
+ // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
+ unsigned NumChains = StoreChainDependents.size();
+ std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
+ StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
+ if (Result.second)
+ StoreChainDependents.resize(NumChains + 1);
+ StoreChainDependents[Result.first->second].push_back(SU);
+ }
+ // Iterate over the store chains.
+ for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
+ clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
+}
+
+//===----------------------------------------------------------------------===//
+// MacroFusion - DAG post-processing to encourage fusion of macro ops.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Post-process the DAG to create cluster edges between instructions
+/// that may be fused by the processor into a single operation.
+class MacroFusion : public ScheduleDAGMutation {
+ const TargetInstrInfo *TII;
+public:
+ MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
+
+ virtual void apply(ScheduleDAGMI *DAG);
+};
+} // anonymous
+
+/// \brief Callback from DAG postProcessing to create cluster edges to encourage
+/// fused operations.
+void MacroFusion::apply(ScheduleDAGMI *DAG) {
+ // For now, assume targets can only fuse with the branch.
+ MachineInstr *Branch = DAG->ExitSU.getInstr();
+ if (!Branch)
+ return;
+
+ for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
+ SUnit *SU = &DAG->SUnits[--Idx];
+ if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
+ continue;
+
+ // Create a single weak edge from SU to ExitSU. The only effect is to cause
+ // bottom-up scheduling to heavily prioritize the clustered SU. There is no
+ // need to copy predecessor edges from ExitSU to SU, since top-down
+ // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
+ // of SU, we could create an artificial edge from the deepest root, but it
+ // hasn't been needed yet.
+ bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
+ (void)Success;
+ assert(Success && "No DAG nodes should be reachable from ExitSU");
+
+ DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
+ break;
+ }
+}
+
+//===----------------------------------------------------------------------===//
// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
//===----------------------------------------------------------------------===//
@@ -666,9 +865,10 @@ public:
/// Represent the type of SchedCandidate found within a single queue.
/// pickNodeBidirectional depends on these listed by decreasing priority.
enum CandReason {
- NoCand, SingleExcess, SingleCritical, ResourceReduce, ResourceDemand,
- BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce,
- SingleMax, MultiPressure, NextDefUse, NodeOrder};
+ NoCand, SingleExcess, SingleCritical, Cluster,
+ ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
+ TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
+ NodeOrder};
#ifndef NDEBUG
static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
@@ -1019,6 +1219,8 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
+ if (I->isWeak())
+ continue;
unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
unsigned MinLatency = I->getMinLatency();
#ifndef NDEBUG
@@ -1414,6 +1616,7 @@ static bool tryLess(unsigned TryVal, unsigned CandVal,
}
return false;
}
+
static bool tryGreater(unsigned TryVal, unsigned CandVal,
ConvergingScheduler::SchedCandidate &TryCand,
ConvergingScheduler::SchedCandidate &Cand,
@@ -1430,6 +1633,10 @@ static bool tryGreater(unsigned TryVal, unsigned CandVal,
return false;
}
+static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
+ return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
+}
+
/// Apply a set of heursitics to a new candidate. Heuristics are currently
/// hierarchical. This may be more efficient than a graduated cost model because
/// we don't need to evaluate all aspects of the model for each node in the
@@ -1472,6 +1679,26 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
if (Cand.Reason == SingleCritical)
Cand.Reason = MultiPressure;
+ // Keep clustered nodes together to encourage downstream peephole
+ // optimizations which may reduce resource requirements.
+ //
+ // This is a best effort to set things up for a post-RA pass. Optimizations
+ // like generating loads of multiple registers should ideally be done within
+ // the scheduler pass by combining the loads during DAG postprocessing.
+ const SUnit *NextClusterSU =
+ Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+ if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
+ TryCand, Cand, Cluster))
+ return;
+ // Currently, weak edges are for clustering, so we hard-code that reason.
+ // However, deferring the current TryCand will not change Cand's reason.
+ CandReason OrigReason = Cand.Reason;
+ if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
+ getWeakLeft(Cand.SU, Zone.isTop()),
+ TryCand, Cand, Cluster)) {
+ Cand.Reason = OrigReason;
+ return;
+ }
// Avoid critical resource consumption and balance the schedule.
TryCand.initResourceDelta(DAG, SchedModel);
if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
@@ -1518,15 +1745,10 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
// Prefer immediate defs/users of the last scheduled instruction. This is a
// nice pressure avoidance strategy that also conserves the processor's
// register renaming resources and keeps the machine code readable.
- if (Zone.NextSUs.count(TryCand.SU) && !Zone.NextSUs.count(Cand.SU)) {
- TryCand.Reason = NextDefUse;
+ if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
+ TryCand, Cand, NextDefUse))
return;
- }
- if (!Zone.NextSUs.count(TryCand.SU) && Zone.NextSUs.count(Cand.SU)) {
- if (Cand.Reason > NextDefUse)
- Cand.Reason = NextDefUse;
- return;
- }
+
// Fall through to original instruction order.
if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
|| (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
@@ -1572,6 +1794,7 @@ const char *ConvergingScheduler::getReasonStr(
case NoCand: return "NOCAND ";
case SingleExcess: return "REG-EXCESS";
case SingleCritical: return "REG-CRIT ";
+ case Cluster: return "CLUSTER ";
case SingleMax: return "REG-MAX ";
case MultiPressure: return "REG-MULTI ";
case ResourceReduce: return "RES-REDUCE";
@@ -1812,7 +2035,13 @@ void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
assert((!ForceTopDown || !ForceBottomUp) &&
"-misched-topdown incompatible with -misched-bottomup");
- return new ScheduleDAGMI(C, new ConvergingScheduler());
+ ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
+ // Register DAG post-processors.
+ if (EnableLoadCluster)
+ DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
+ if (EnableMacroFusion)
+ DAG->addMutation(new MacroFusion(DAG->TII));
+ return DAG;
}
static MachineSchedRegistry
ConvergingSchedRegistry("converge", "Standard converging scheduler.",
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 7c7d2c8045..7728cb4d4e 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/Support/CommandLine.h"
@@ -241,7 +242,9 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
disablePass(&EarlyIfConverterID);
// Temporarily disable experimental passes.
- substitutePass(&MachineSchedulerID, 0);
+ const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
+ if (!ST.enableMachineScheduler())
+ disablePass(&MachineSchedulerID);
}
/// Insert InsertedPassID pass after TargetPassID.
@@ -472,8 +475,7 @@ void TargetPassConfig::addMachinePasses() {
// Add passes that optimize machine instructions in SSA form.
if (getOptLevel() != CodeGenOpt::None) {
addMachineSSAOptimization();
- }
- else {
+ } else {
// If the target requests it, assign local variables to stack slots relative
// to one another and simplify frame index references where possible.
addPass(&LocalStackSlotAllocationID);
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index d57bc7362d..f37fc82b2a 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -111,9 +111,6 @@ namespace {
/// added to the AvailableQueue.
std::vector<SUnit*> PendingQueue;
- /// Topo - A topological ordering for SUnits.
- ScheduleDAGTopologicalSort Topo;
-
/// HazardRec - The hazard recognizer to use.
ScheduleHazardRecognizer *HazardRec;
@@ -198,7 +195,7 @@ SchedulePostRATDList::SchedulePostRATDList(
AliasAnalysis *AA, const RegisterClassInfo &RCI,
TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs)
- : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), Topo(SUnits), AA(AA),
+ : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA),
LiveRegs(TRI->getNumRegs())
{
const TargetMachine &TM = MF.getTarget();
@@ -580,10 +577,14 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
//===----------------------------------------------------------------------===//
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
-/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+/// the PendingQueue if the count reaches zero.
void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
SUnit *SuccSU = SuccEdge->getSUnit();
+ if (SuccEdge->isWeak()) {
+ --SuccSU->WeakPredsLeft;
+ return;
+ }
#ifndef NDEBUG
if (SuccSU->NumPredsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
@@ -653,8 +654,7 @@ void SchedulePostRATDList::ListScheduleTopDown() {
// Add all leaves to Available queue.
for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
// It is available if it has no predecessors.
- bool available = SUnits[i].Preds.empty();
- if (available) {
+ if (!SUnits[i].NumPredsLeft && !SUnits[i].isAvailable) {
AvailableQueue.push(&SUnits[i]);
SUnits[i].isAvailable = true;
}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 77554d691c..36c1ae7f72 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -133,19 +133,6 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
return true;
}
-#if 0
-void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- if (ShrinkWrapping || ShrinkWrapFunc != "") {
- AU.addRequired<MachineLoopInfo>();
- AU.addRequired<MachineDominatorTree>();
- }
- AU.addPreserved<MachineLoopInfo>();
- AU.addPreserved<MachineDominatorTree>();
- MachineFunctionPass::getAnalysisUsage(AU);
-}
-#endif
-
/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack
/// variables for the function's frame information and eliminate call frame
/// pseudo instructions.
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index 993dbc71de..2b598e3a56 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -58,7 +58,6 @@ void RegAllocBase::init(VirtRegMap &vrm,
VRM = &vrm;
LIS = &lis;
Matrix = &mat;
- MRI->freezeReservedRegs(vrm.getMachineFunction());
RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
}
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 88922169b3..3744b06f3a 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -1127,7 +1127,6 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
TM = &Fn.getTarget();
TRI = TM->getRegisterInfo();
TII = TM->getInstrInfo();
- MRI->freezeReservedRegs(Fn);
RegClassInfo.runOnMachineFunction(Fn);
UsedInInstr.clear();
UsedInInstr.setUniverse(TRI->getNumRegs());
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 02ebce7a11..f58d45f131 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -552,8 +552,6 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
vrm = &getAnalysis<VirtRegMap>();
spiller.reset(createInlineSpiller(*this, MF, *vrm));
- mri->freezeReservedRegs(MF);
-
DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getName() << "\n");
// Allocator main loop:
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index e47a677b77..05c48c6802 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -45,6 +45,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cmath>
using namespace llvm;
@@ -63,6 +64,17 @@ EnableJoining("join-liveintervals",
cl::desc("Coalesce copies (default=true)"),
cl::init(true));
+// Temporary flag to test critical edge unsplitting.
+static cl::opt<bool>
+EnableJoinSplits("join-splitedges",
+ cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
+
+// Temporary flag to test global copy optimization.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalCopies("join-globalcopies",
+ cl::desc("Coalesce copies that span blocks (default=subtarget)"),
+ cl::init(cl::BOU_UNSET), cl::Hidden);
+
static cl::opt<bool>
VerifyCoalescing("verify-coalescing",
cl::desc("Verify machine instrs before and after register coalescing"),
@@ -82,8 +94,17 @@ namespace {
AliasAnalysis *AA;
RegisterClassInfo RegClassInfo;
+ /// \brief True if the coalescer should aggressively coalesce global copies
+ /// in favor of keeping local copies.
+ bool JoinGlobalCopies;
+
+ /// \brief True if the coalescer should aggressively coalesce fall-thru
+ /// blocks exclusively containing copies.
+ bool JoinSplitEdges;
+
/// WorkList - Copy instructions yet to be coalesced.
SmallVector<MachineInstr*, 8> WorkList;
+ SmallVector<MachineInstr*, 8> LocalWorkList;
/// ErasedInstrs - Set of instruction pointers that have been erased, and
/// that may be present in WorkList.
@@ -101,6 +122,9 @@ namespace {
/// LiveRangeEdit callback.
void LRE_WillEraseInstruction(MachineInstr *MI);
+ /// coalesceLocals - coalesce the LocalWorkList.
+ void coalesceLocals();
+
/// joinAllIntervals - join compatible live intervals
void joinAllIntervals();
@@ -108,9 +132,9 @@ namespace {
/// copies that cannot yet be coalesced into WorkList.
void copyCoalesceInMBB(MachineBasicBlock *MBB);
- /// copyCoalesceWorkList - Try to coalesce all copies in WorkList after
- /// position From. Return true if any progress was made.
- bool copyCoalesceWorkList(unsigned From = 0);
+ /// copyCoalesceWorkList - Try to coalesce all copies in CurrList. Return
+ /// true if any progress was made.
+ bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
/// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
/// which are the src/dst of the copy instruction CopyMI. This returns
@@ -154,7 +178,7 @@ namespace {
MachineInstr *CopyMI);
/// canJoinPhys - Return true if a physreg copy should be joined.
- bool canJoinPhys(CoalescerPair &CP);
+ bool canJoinPhys(const CoalescerPair &CP);
/// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
/// update the subregister number if it is not zero. If DstReg is a
@@ -217,6 +241,23 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
return true;
}
+// Return true if this block should be vacated by the coalescer to eliminate
+// branches. The important cases to handle in the coalescer are critical edges
+// split during phi elimination which contain only copies. Simple blocks that
+// contain non-branches should also be vacated, but this can be handled by an
+// earlier pass similar to early if-conversion.
+static bool isSplitEdge(const MachineBasicBlock *MBB) {
+ if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+ return false;
+
+ for (MachineBasicBlock::const_iterator MII = MBB->begin(), E = MBB->end();
+ MII != E; ++MII) {
+ if (!MII->isCopyLike() && !MII->isUnconditionalBranch())
+ return false;
+ }
+ return true;
+}
+
bool CoalescerPair::setRegisters(const MachineInstr *MI) {
SrcReg = DstReg = 0;
SrcIdx = DstIdx = 0;
@@ -887,7 +928,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
}
/// canJoinPhys - Return true if a copy involving a physreg should be joined.
-bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) {
+bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
/// Always join simple intervals that are defined by a single copy from a
/// reserved register. This doesn't increase register pressure, so it is
/// always beneficial.
@@ -1895,47 +1936,77 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
}
namespace {
- // DepthMBBCompare - Comparison predicate that sort first based on the loop
- // depth of the basic block (the unsigned), and then on the MBB number.
- struct DepthMBBCompare {
- typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
- bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
- // Deeper loops first
- if (LHS.first != RHS.first)
- return LHS.first > RHS.first;
-
- // Prefer blocks that are more connected in the CFG. This takes care of
- // the most difficult copies first while intervals are short.
- unsigned cl = LHS.second->pred_size() + LHS.second->succ_size();
- unsigned cr = RHS.second->pred_size() + RHS.second->succ_size();
- if (cl != cr)
- return cl > cr;
-
- // As a last resort, sort by block number.
- return LHS.second->getNumber() < RHS.second->getNumber();
- }
- };
+// Information concerning MBB coalescing priority.
+struct MBBPriorityInfo {
+ MachineBasicBlock *MBB;
+ unsigned Depth;
+ bool IsSplit;
+
+ MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
+ : MBB(mbb), Depth(depth), IsSplit(issplit) {}
+};
+}
+
+// C-style comparator that sorts first based on the loop depth of the basic
+// block (the unsigned), and then on the MBB number.
+//
+// EnableGlobalCopies assumes that the primary sort key is loop depth.
+static int compareMBBPriority(const void *L, const void *R) {
+ const MBBPriorityInfo *LHS = static_cast<const MBBPriorityInfo*>(L);
+ const MBBPriorityInfo *RHS = static_cast<const MBBPriorityInfo*>(R);
+ // Deeper loops first
+ if (LHS->Depth != RHS->Depth)
+ return LHS->Depth > RHS->Depth ? -1 : 1;
+
+ // Try to unsplit critical edges next.
+ if (LHS->IsSplit != RHS->IsSplit)
+ return LHS->IsSplit ? -1 : 1;
+
+ // Prefer blocks that are more connected in the CFG. This takes care of
+ // the most difficult copies first while intervals are short.
+ unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
+ unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
+ if (cl != cr)
+ return cl > cr ? -1 : 1;
+
+ // As a last resort, sort by block number.
+ return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
+}
+
+/// \returns true if the given copy uses or defines a local live range.
+static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
+ if (!Copy->isCopy())
+ return false;
+
+ unsigned SrcReg = Copy->getOperand(1).getReg();
+ unsigned DstReg = Copy->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg)
+ || TargetRegisterInfo::isPhysicalRegister(DstReg))
+ return false;
+
+ return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
+ || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
}
// Try joining WorkList copies starting from index From.
// Null out any successful joins.
-bool RegisterCoalescer::copyCoalesceWorkList(unsigned From) {
- assert(From <= WorkList.size() && "Out of range");
+bool RegisterCoalescer::
+copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
bool Progress = false;
- for (unsigned i = From, e = WorkList.size(); i != e; ++i) {
- if (!WorkList[i])
+ for (unsigned i = 0, e = CurrList.size(); i != e; ++i) {
+ if (!CurrList[i])
continue;
// Skip instruction pointers that have already been erased, for example by
// dead code elimination.
- if (ErasedInstrs.erase(WorkList[i])) {
- WorkList[i] = 0;
+ if (ErasedInstrs.erase(CurrList[i])) {
+ CurrList[i] = 0;
continue;
}
bool Again = false;
- bool Success = joinCopy(WorkList[i], Again);
+ bool Success = joinCopy(CurrList[i], Again);
Progress |= Success;
if (Success || !Again)
- WorkList[i] = 0;
+ CurrList[i] = 0;
}
return Progress;
}
@@ -1947,52 +2018,74 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
// Collect all copy-like instructions in MBB. Don't start coalescing anything
// yet, it might invalidate the iterator.
const unsigned PrevSize = WorkList.size();
- for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
- MII != E; ++MII)
- if (MII->isCopyLike())
- WorkList.push_back(MII);
-
+ if (JoinGlobalCopies) {
+ // Coalesce copies bottom-up to coalesce local defs before local uses. They
+ // are not inherently easier to resolve, but slightly preferable until we
+ // have local live range splitting. In particular this is required by
+ // cmp+jmp macro fusion.
+ for (MachineBasicBlock::reverse_iterator
+ MII = MBB->rbegin(), E = MBB->rend(); MII != E; ++MII) {
+ if (!MII->isCopyLike())
+ continue;
+ if (isLocalCopy(&(*MII), LIS))
+ LocalWorkList.push_back(&(*MII));
+ else
+ WorkList.push_back(&(*MII));
+ }
+ }
+ else {
+ for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+ MII != E; ++MII)
+ if (MII->isCopyLike())
+ WorkList.push_back(MII);
+ }
// Try coalescing the collected copies immediately, and remove the nulls.
// This prevents the WorkList from getting too large since most copies are
// joinable on the first attempt.
- if (copyCoalesceWorkList(PrevSize))
+ MutableArrayRef<MachineInstr*>
+ CurrList(WorkList.begin() + PrevSize, WorkList.end());
+ if (copyCoalesceWorkList(CurrList))
WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
(MachineInstr*)0), WorkList.end());
}
+void RegisterCoalescer::coalesceLocals() {
+ copyCoalesceWorkList(LocalWorkList);
+ for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) {
+ if (LocalWorkList[j])
+ WorkList.push_back(LocalWorkList[j]);
+ }
+ LocalWorkList.clear();
+}
+
void RegisterCoalescer::joinAllIntervals() {
DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
- assert(WorkList.empty() && "Old data still around.");
-
- if (Loops->empty()) {
- // If there are no loops in the function, join intervals in function order.
- for (MachineFunction::iterator I = MF->begin(), E = MF->end();
- I != E; ++I)
- copyCoalesceInMBB(I);
- } else {
- // Otherwise, join intervals in inner loops before other intervals.
- // Unfortunately we can't just iterate over loop hierarchy here because
- // there may be more MBB's than BB's. Collect MBB's for sorting.
-
- // Join intervals in the function prolog first. We want to join physical
- // registers with virtual registers before the intervals got too long.
- std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs;
- for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){
- MachineBasicBlock *MBB = I;
- MBBs.push_back(std::make_pair(Loops->getLoopDepth(MBB), I));
+ assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
+
+ std::vector<MBBPriorityInfo> MBBs;
+ MBBs.reserve(MF->size());
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){
+ MachineBasicBlock *MBB = I;
+ MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
+ JoinSplitEdges && isSplitEdge(MBB)));
+ }
+ array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
+
+ // Coalesce intervals in MBB priority order.
+ unsigned CurrDepth = UINT_MAX;
+ for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
+ // Try coalescing the collected local copies for deeper loops.
+ if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) {
+ coalesceLocals();
+ CurrDepth = MBBs[i].Depth;
}
-
- // Sort by loop depth.
- std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare());
-
- // Finally, join intervals in loop nest order.
- for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
- copyCoalesceInMBB(MBBs[i].second);
+ copyCoalesceInMBB(MBBs[i].MBB);
}
+ coalesceLocals();
// Joining intervals can allow other intervals to be joined. Iteratively join
// until we make no progress.
- while (copyCoalesceWorkList())
+ while (copyCoalesceWorkList(WorkList))
/* empty */ ;
}
@@ -2014,6 +2107,17 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
AA = &getAnalysis<AliasAnalysis>();
Loops = &getAnalysis<MachineLoopInfo>();
+ const TargetSubtargetInfo &ST = TM->getSubtarget<TargetSubtargetInfo>();
+ if (EnableGlobalCopies == cl::BOU_UNSET)
+ JoinGlobalCopies = ST.enableMachineScheduler();
+ else
+ JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
+
+ // The MachineScheduler does not currently require JoinSplitEdges. This will
+ // either be enabled unconditionally or replaced by a more general live range
+ // splitting optimization.
+ JoinSplitEdges = EnableJoinSplits;
+
DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
<< "********** Function: " << MF->getName() << '\n');
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 5ec6564ce3..c85ccd05fa 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -27,10 +27,6 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
using namespace llvm;
/// setUsed - Set the register and its sub-registers as being used.
@@ -43,7 +39,7 @@ void RegScavenger::setUsed(unsigned Reg) {
bool RegScavenger::isAliasUsed(unsigned Reg) const {
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- if (isUsed(*AI))
+ if (isUsed(*AI, *AI == Reg))
return true;
return false;
}
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 9a65071001..0c50db8d34 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -62,10 +62,14 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
/// addPred - This adds the specified edge as a pred of the current node if
/// not already. It also adds the current node as a successor of the
/// specified node.
-bool SUnit::addPred(const SDep &D) {
+bool SUnit::addPred(const SDep &D, bool Required) {
// If this node already has this depenence, don't add a redundant one.
for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
I != E; ++I) {
+ // Zero-latency weak edges may be added purely for heuristic ordering. Don't
+ // add them if another kind of edge already exists.
+ if (!Required && I->getSUnit() == D.getSUnit())
+ return false;
if (I->overlaps(D)) {
// Extend the latency if needed. Equivalent to removePred(I) + addPred(D).
if (I->getLatency() < D.getLatency()) {
@@ -97,12 +101,22 @@ bool SUnit::addPred(const SDep &D) {
++N->NumSuccs;
}
if (!N->isScheduled) {
- assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
- ++NumPredsLeft;
+ if (D.isWeak()) {
+ ++WeakPredsLeft;
+ }
+ else {
+ assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
+ ++NumPredsLeft;
+ }
}
if (!isScheduled) {
- assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
- ++N->NumSuccsLeft;
+ if (D.isWeak()) {
+ ++N->WeakSuccsLeft;
+ }
+ else {
+ assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+ ++N->NumSuccsLeft;
+ }
}
Preds.push_back(D);
N->Succs.push_back(P);
@@ -144,12 +158,20 @@ void SUnit::removePred(const SDep &D) {
--N->NumSuccs;
}
if (!N->isScheduled) {
- assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
- --NumPredsLeft;
+ if (D.isWeak())
+ --WeakPredsLeft;
+ else {
+ assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
+ --NumPredsLeft;
+ }
}
if (!isScheduled) {
- assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
- --N->NumSuccsLeft;
+ if (D.isWeak())
+ --N->WeakSuccsLeft;
+ else {
+ assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
+ --N->NumSuccsLeft;
+ }
}
if (P.getLatency() != 0) {
this->setDepthDirty();
@@ -292,6 +314,10 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
dbgs() << " # preds left : " << NumPredsLeft << "\n";
dbgs() << " # succs left : " << NumSuccsLeft << "\n";
+ if (WeakPredsLeft)
+ dbgs() << " # weak preds left : " << WeakPredsLeft << "\n";
+ if (WeakSuccsLeft)
+ dbgs() << " # weak succs left : " << WeakSuccsLeft << "\n";
dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n";
dbgs() << " Latency : " << Latency << "\n";
dbgs() << " Depth : " << Depth << "\n";
@@ -429,6 +455,8 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
Node2Index.resize(DAGSize);
// Initialize the data structures.
+ if (ExitSU)
+ WorkList.push_back(ExitSU);
for (unsigned i = 0, e = DAGSize; i != e; ++i) {
SUnit *SU = &SUnits[i];
int NodeNum = SU->NodeNum;
@@ -448,11 +476,12 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
while (!WorkList.empty()) {
SUnit *SU = WorkList.back();
WorkList.pop_back();
- Allocate(SU->NodeNum, --Id);
+ if (SU->NodeNum < DAGSize)
+ Allocate(SU->NodeNum, --Id);
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
SUnit *SU = I->getSUnit();
- if (!--Node2Index[SU->NodeNum])
+ if (SU->NodeNum < DAGSize && !--Node2Index[SU->NodeNum])
// If all dependencies of the node are processed already,
// then the node can be computed now.
WorkList.push_back(SU);
@@ -513,7 +542,10 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
WorkList.pop_back();
Visited.set(SU->NodeNum);
for (int I = SU->Succs.size()-1; I >= 0; --I) {
- int s = SU->Succs[I].getSUnit()->NodeNum;
+ unsigned s = SU->Succs[I].getSUnit()->NodeNum;
+ // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
+ if (s >= Node2Index.size())
+ continue;
if (Node2Index[s] == UpperBound) {
HasLoop = true;
return;
@@ -554,15 +586,16 @@ void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
}
-/// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
-/// create a cycle.
-bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
- if (IsReachable(TargetSU, SU))
+/// WillCreateCycle - Returns true if adding an edge to TargetSU from SU will
+/// create a cycle. If so, it is not safe to call AddPred(TargetSU, SU).
+bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
+ // Is SU reachable from TargetSU via successor edges?
+ if (IsReachable(SU, TargetSU))
return true;
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I)
+ for (SUnit::pred_iterator
+ I = TargetSU->Preds.begin(), E = TargetSU->Preds.end(); I != E; ++I)
if (I->isAssignedRegDep() &&
- IsReachable(TargetSU, I->getSUnit()))
+ IsReachable(SU, I->getSUnit()))
return true;
return false;
}
@@ -592,6 +625,7 @@ void ScheduleDAGTopologicalSort::Allocate(int n, int index) {
}
ScheduleDAGTopologicalSort::
-ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits) : SUnits(sunits) {}
+ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits, SUnit *exitsu)
+ : SUnits(sunits), ExitSU(exitsu) {}
ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index a4d4a93e6d..683011d3de 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -33,6 +33,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallPtrSet.h"
using namespace llvm;
@@ -245,21 +246,26 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
if (UseSU == SU)
continue;
- SDep dep(SU, SDep::Data, *Alias);
-
// Adjust the dependence latency using operand def/use information,
// then allow the target to perform its own adjustments.
int UseOp = UseList[i].OpIdx;
- MachineInstr *RegUse = UseOp < 0 ? 0 : UseSU->getInstr();
- dep.setLatency(
+ MachineInstr *RegUse = 0;
+ SDep Dep;
+ if (UseOp < 0)
+ Dep = SDep(SU, SDep::Artificial);
+ else {
+ Dep = SDep(SU, SDep::Data, *Alias);
+ RegUse = UseSU->getInstr();
+ Dep.setMinLatency(
+ SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+ RegUse, UseOp, /*FindMin=*/true));
+ }
+ Dep.setLatency(
SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
RegUse, UseOp, /*FindMin=*/false));
- dep.setMinLatency(
- SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
- RegUse, UseOp, /*FindMin=*/true));
- ST.adjustSchedDependency(SU, UseSU, dep);
- UseSU->addPred(dep);
+ ST.adjustSchedDependency(SU, UseSU, Dep);
+ UseSU->addPred(Dep);
}
}
}
@@ -680,8 +686,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// so that they can be given more precise dependencies. We track
// separately the known memory locations that may alias and those
// that are known not to alias
- std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
- std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
+ MapVector<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
+ MapVector<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
std::set<SUnit*> RejectMemNodes;
// Remove any stale debug info; sometimes BuildSchedGraph is called again
@@ -760,11 +766,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
if (isGlobalMemoryObject(AA, MI)) {
// Be conservative with these and add dependencies on all memory
// references, even those that are known to not alias.
- for (std::map<const Value *, SUnit *>::iterator I =
+ for (MapVector<const Value *, SUnit *>::iterator I =
NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
I->second->addPred(SDep(SU, SDep::Barrier));
}
- for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
+ for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
SDep Dep(SU, SDep::Barrier);
@@ -798,10 +804,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
TrueMemOrderLatency);
- for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
+ for (MapVector<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
E = AliasMemDefs.end(); I != E; ++I)
addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
- for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
+ for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes,
@@ -818,13 +824,12 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// A store to a specific PseudoSourceValue. Add precise dependencies.
// Record the def in MemDefs, first adding a dep if there is
// an existing def.
- std::map<const Value *, SUnit *>::iterator I =
+ MapVector<const Value *, SUnit *>::iterator I =
((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
- std::map<const Value *, SUnit *>::iterator IE =
+ MapVector<const Value *, SUnit *>::iterator IE =
((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE) {
- addChainDependency(AA, MFI, SU, I->second, RejectMemNodes,
- 0, true);
+ addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
I->second = SU;
} else {
if (MayAlias)
@@ -833,9 +838,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
NonAliasMemDefs[V] = SU;
}
// Handle the uses in MemUses, if there are any.
- std::map<const Value *, std::vector<SUnit *> >::iterator J =
+ MapVector<const Value *, std::vector<SUnit *> >::iterator J =
((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
- std::map<const Value *, std::vector<SUnit *> >::iterator JE =
+ MapVector<const Value *, std::vector<SUnit *> >::iterator JE =
((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
if (J != JE) {
for (unsigned i = 0, e = J->second.size(); i != e; ++i)
@@ -880,9 +885,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
if (const Value *V =
getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
// A load from a specific PseudoSourceValue. Add precise dependencies.
- std::map<const Value *, SUnit *>::iterator I =
+ MapVector<const Value *, SUnit *>::iterator I =
((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
- std::map<const Value *, SUnit *>::iterator IE =
+ MapVector<const Value *, SUnit *>::iterator IE =
((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE)
addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
@@ -893,7 +898,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
} else {
// A load with no underlying object. Depend on all
// potentially aliasing stores.
- for (std::map<const Value *, SUnit *>::iterator I =
+ for (MapVector<const Value *, SUnit *>::iterator I =
AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5416a5c1d7..37d7731aa1 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7728,7 +7728,18 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (StoreNodes[i].MemNode == EarliestOp)
continue;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
+ // ReplaceAllUsesWith will replace all uses that existed when it was
+ // called, but graph optimizations may cause new ones to appear. For
+ // example, the case in pr14333 looks like
+ //
+ // St's chain -> St -> another store -> X
+ //
+ // And the only difference from St to the other store is the chain.
+ // When we change it's chain to be St's chain they become identical,
+ // get CSEed and the net result is that X is now a use of St.
+ // Since we know that St is redundant, just iterate.
+ while (!St->use_empty())
+ DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
removeFromWorkList(St);
DAG.DeleteNode(St);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 22f8d51ab2..d63862d638 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -221,6 +221,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FRINT:
case ISD::FNEARBYINT:
case ISD::FFLOOR:
+ case ISD::FP_ROUND:
+ case ISD::FP_EXTEND:
case ISD::FMA:
case ISD::SIGN_EXTEND_INREG:
QueryType = Node->getValueType(0);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index c55456902c..dc8f0ee4a2 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -156,7 +156,7 @@ public:
CodeGenOpt::Level OptLevel)
: ScheduleDAGSDNodes(mf),
NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
- Topo(SUnits) {
+ Topo(SUnits, NULL) {
const TargetMachine &tm = mf.getTarget();
if (DisableSchedCycles || !NeedLatency)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 30f03ac737..f8ca7b1d40 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -123,6 +123,8 @@ void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
llvm_unreachable(0);
}
#endif
+ assert(!D.isWeak() && "unexpected artificial DAG edge");
+
--SuccSU->NumPredsLeft;
SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index be3168618e..649b1c4897 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3137,12 +3137,12 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
OI != E; ++OI) {
const Value *Idx = *OI;
if (StructType *StTy = dyn_cast<StructType>(Ty)) {
- unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+ unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
if (Field) {
// N = N + Offset
uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
- DAG.getIntPtrConstant(Offset));
+ DAG.getConstant(Offset, N.getValueType()));
}
Ty = StTy->getElementType(Field);
@@ -3187,7 +3187,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
N.getValueType(), IdxN,
DAG.getConstant(Amt, IdxN.getValueType()));
} else {
- SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
+ SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType());
IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
N.getValueType(), IdxN, Scale);
}
@@ -3687,16 +3687,12 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) {
return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
}
-/// visitExp - Lower an exp intrinsic. Handles the special sequences for
+/// expandExp - Lower an exp intrinsic. Handles the special sequences for
/// limited-precision mode.
-void
-SelectionDAGBuilder::visitExp(const CallInst &I) {
- SDValue result;
- DebugLoc dl = getCurDebugLoc();
-
- if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+static SDValue expandExp(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue Op = getValue(I.getArgOperand(0));
// Put the exponent in the right bit position for later addition to the
// final result:
@@ -3715,6 +3711,7 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
DAG.getConstant(23, TLI.getPointerTy()));
+ SDValue TwoToFracPartOfX;
if (LimitFloatPrecision <= 6) {
// For floating-point precision of 6:
//
@@ -3728,16 +3725,9 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3f3c50c8));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3f7f5e7e));
- SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5);
-
- // Add the exponent into the result in integer domain.
- SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
- TwoToFracPartOfX, IntegerPartOfX);
-
- result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6);
- } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ } else if (LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
//
// TwoToFractionalPartOfX =
@@ -3754,16 +3744,9 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f324b07));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
- getF32Constant(DAG, 0x3f7ff8fd));
- SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7);
-
- // Add the exponent into the result in integer domain.
- SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
- TwoToFracPartOfX, IntegerPartOfX);
-
- result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8);
- } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ } else { // LimitFloatPrecision <= 18
// For floating-point precision of 18:
//
// TwoToFractionalPartOfX =
@@ -3792,37 +3775,27 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
getF32Constant(DAG, 0x3f317234));
SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
- SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
- getF32Constant(DAG, 0x3f800000));
- SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,
- MVT::i32, t13);
-
- // Add the exponent into the result in integer domain.
- SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
- TwoToFracPartOfX, IntegerPartOfX);
-
- result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14);
+ TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
}
- } else {
- // No special expansion.
- result = DAG.getNode(ISD::FEXP, dl,
- getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0)));
+
+ // Add the exponent into the result in integer domain.
+ SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::ADD, dl, MVT::i32,
+ t13, IntegerPartOfX));
}
- setValue(&I, result);
+ // No special expansion.
+ return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
}
-/// visitLog - Lower a log intrinsic. Handles the special sequences for
+/// expandLog - Lower a log intrinsic. Handles the special sequences for
/// limited-precision mode.
-void
-SelectionDAGBuilder::visitLog(const CallInst &I) {
- SDValue result;
- DebugLoc dl = getCurDebugLoc();
-
- if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+static SDValue expandLog(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue Op = getValue(I.getArgOperand(0));
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Scale the exponent by log(2) [0.69314718f].
@@ -3834,6 +3807,7 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
// exponent of 1.
SDValue X = GetSignificand(DAG, Op1, dl);
+ SDValue LogOfMantissa;
if (LimitFloatPrecision <= 6) {
// For floating-point precision of 6:
//
@@ -3847,12 +3821,9 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3fb3a2b1));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
- SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3f949a29));
-
- result = DAG.getNode(ISD::FADD, dl,
- MVT::f32, LogOfExponent, LogOfMantissa);
- } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f949a29));
+ } else if (LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
//
// LogOfMantissa =
@@ -3873,12 +3844,9 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x40348e95));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
- getF32Constant(DAG, 0x3fdef31a));
-
- result = DAG.getNode(ISD::FADD, dl,
- MVT::f32, LogOfExponent, LogOfMantissa);
- } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3fdef31a));
+ } else { // LimitFloatPrecision <= 18
// For floating-point precision of 18:
//
// LogOfMantissa =
@@ -3907,32 +3875,23 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
getF32Constant(DAG, 0x408797cb));
SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
- SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
- getF32Constant(DAG, 0x4006dcab));
-
- result = DAG.getNode(ISD::FADD, dl,
- MVT::f32, LogOfExponent, LogOfMantissa);
+ LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4006dcab));
}
- } else {
- // No special expansion.
- result = DAG.getNode(ISD::FLOG, dl,
- getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0)));
+
+ return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
}
- setValue(&I, result);
+ // No special expansion.
+ return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op);
}
-/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
+/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
/// limited-precision mode.
-void
-SelectionDAGBuilder::visitLog2(const CallInst &I) {
- SDValue result;
- DebugLoc dl = getCurDebugLoc();
-
- if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+static SDValue expandLog2(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue Op = getValue(I.getArgOperand(0));
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Get the exponent.
@@ -3944,6 +3903,7 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
// Different possible minimax approximations of significand in
// floating-point for various degrees of accuracy over [1,2].
+ SDValue Log2ofMantissa;
if (LimitFloatPrecision <= 6) {
// For floating-point precision of 6:
//
@@ -3955,12 +3915,9 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
getF32Constant(DAG, 0x40019463));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
- SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3fd6633d));
-
- result = DAG.getNode(ISD::FADD, dl,
- MVT::f32, LogOfExponent, Log2ofMantissa);
- } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fd6633d));
+ } else if (LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
//
// Log2ofMantissa =
@@ -3981,12 +3938,9 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x40823e2f));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
- getF32Constant(DAG, 0x4020d29c));
-
- result = DAG.getNode(ISD::FADD, dl,
- MVT::f32, LogOfExponent, Log2ofMantissa);
- } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x4020d29c));
+ } else { // LimitFloatPrecision <= 18
// For floating-point precision of 18:
//
// Log2ofMantissa =
@@ -4016,32 +3970,23 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
getF32Constant(DAG, 0x40c39dad));
SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
- SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
- getF32Constant(DAG, 0x4042902c));
-
- result = DAG.getNode(ISD::FADD, dl,
- MVT::f32, LogOfExponent, Log2ofMantissa);
+ Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4042902c));
}
- } else {
- // No special expansion.
- result = DAG.getNode(ISD::FLOG2, dl,
- getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0)));
+
+ return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
}
- setValue(&I, result);
+ // No special expansion.
+ return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op);
}
-/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
+/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
/// limited-precision mode.
-void
-SelectionDAGBuilder::visitLog10(const CallInst &I) {
- SDValue result;
- DebugLoc dl = getCurDebugLoc();
-
- if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+static SDValue expandLog10(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue Op = getValue(I.getArgOperand(0));
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Scale the exponent by log10(2) [0.30102999f].
@@ -4053,6 +3998,7 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
// exponent of 1.
SDValue X = GetSignificand(DAG, Op1, dl);
+ SDValue Log10ofMantissa;
if (LimitFloatPrecision <= 6) {
// For floating-point precision of 6:
//
@@ -4066,12 +4012,9 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
getF32Constant(DAG, 0x3f1c0789));
SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
- SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3f011300));
-
- result = DAG.getNode(ISD::FADD, dl,
- MVT::f32, LogOfExponent, Log10ofMantissa);
- } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f011300));
+ } else if (LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
//
// Log10ofMantissa =
@@ -4088,12 +4031,9 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3f6ae232));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3f25f7c3));
-
- result = DAG.getNode(ISD::FADD, dl,
- MVT::f32, LogOfExponent, Log10ofMantissa);
- } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f25f7c3));
+ } else { // LimitFloatPrecision <= 18
// For floating-point precision of 18:
//
// Log10ofMantissa =
@@ -4118,33 +4058,23 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3fc4316c));
SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
- SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
- getF32Constant(DAG, 0x3f57ce70));
-
- result = DAG.getNode(ISD::FADD, dl,
- MVT::f32, LogOfExponent, Log10ofMantissa);
+ Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3f57ce70));
}
- } else {
- // No special expansion.
- result = DAG.getNode(ISD::FLOG10, dl,
- getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0)));
+
+ return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
}
- setValue(&I, result);
+ // No special expansion.
+ return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op);
}
-/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
+/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
/// limited-precision mode.
-void
-SelectionDAGBuilder::visitExp2(const CallInst &I) {
- SDValue result;
- DebugLoc dl = getCurDebugLoc();
-
- if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+static SDValue expandExp2(DebugLoc dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue Op = getValue(I.getArgOperand(0));
-
SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
// FractionalPartOfX = x - (float)IntegerPartOfX;
@@ -4155,6 +4085,7 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
DAG.getConstant(23, TLI.getPointerTy()));
+ SDValue TwoToFractionalPartOfX;
if (LimitFloatPrecision <= 6) {
// For floating-point precision of 6:
//
@@ -4168,15 +4099,9 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3f3c50c8));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3f7f5e7e));
- SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
- SDValue TwoToFractionalPartOfX =
- DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
-
- result = DAG.getNode(ISD::BITCAST, dl,
- MVT::f32, TwoToFractionalPartOfX);
- } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ } else if (LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
//
// TwoToFractionalPartOfX =
@@ -4193,15 +4118,9 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f324b07));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
- getF32Constant(DAG, 0x3f7ff8fd));
- SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
- SDValue TwoToFractionalPartOfX =
- DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
-
- result = DAG.getNode(ISD::BITCAST, dl,
- MVT::f32, TwoToFractionalPartOfX);
- } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ } else { // LimitFloatPrecision <= 18
// For floating-point precision of 18:
//
// TwoToFractionalPartOfX =
@@ -4229,54 +4148,42 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
getF32Constant(DAG, 0x3f317234));
SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
- SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
- getF32Constant(DAG, 0x3f800000));
- SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
- SDValue TwoToFractionalPartOfX =
- DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
-
- result = DAG.getNode(ISD::BITCAST, dl,
- MVT::f32, TwoToFractionalPartOfX);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
}
- } else {
- // No special expansion.
- result = DAG.getNode(ISD::FEXP2, dl,
- getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0)));
+
+ // Add the exponent into the result in integer domain.
+ SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32,
+ TwoToFractionalPartOfX);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::ADD, dl, MVT::i32,
+ t13, IntegerPartOfX));
}
- setValue(&I, result);
+ // No special expansion.
+ return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
}
/// visitPow - Lower a pow intrinsic. Handles the special sequences for
/// limited-precision mode with x == 10.0f.
-void
-SelectionDAGBuilder::visitPow(const CallInst &I) {
- SDValue result;
- const Value *Val = I.getArgOperand(0);
- DebugLoc dl = getCurDebugLoc();
+static SDValue expandPow(DebugLoc dl, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, const TargetLowering &TLI) {
bool IsExp10 = false;
-
- if (getValue(Val).getValueType() == MVT::f32 &&
- getValue(I.getArgOperand(1)).getValueType() == MVT::f32 &&
+ if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
- APFloat Ten(10.0f);
- IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten);
- }
+ if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
+ APFloat Ten(10.0f);
+ IsExp10 = LHSC->isExactlyValue(Ten);
}
}
- if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue Op = getValue(I.getArgOperand(1));
-
+ if (IsExp10) {
// Put the exponent in the right bit position for later addition to the
// final result:
//
// #define LOG2OF10 3.3219281f
// IntegerPartOfX = (int32_t)(x * LOG2OF10);
- SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
getF32Constant(DAG, 0x40549a78));
SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
@@ -4288,6 +4195,7 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
DAG.getConstant(23, TLI.getPointerTy()));
+ SDValue TwoToFractionalPartOfX;
if (LimitFloatPrecision <= 6) {
// For floating-point precision of 6:
//
@@ -4301,15 +4209,9 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
getF32Constant(DAG, 0x3f3c50c8));
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3f7f5e7e));
- SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
- SDValue TwoToFractionalPartOfX =
- DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
-
- result = DAG.getNode(ISD::BITCAST, dl,
- MVT::f32, TwoToFractionalPartOfX);
- } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ } else if (LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
//
// TwoToFractionalPartOfX =
@@ -4326,15 +4228,9 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f324b07));
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
- getF32Constant(DAG, 0x3f7ff8fd));
- SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
- SDValue TwoToFractionalPartOfX =
- DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
-
- result = DAG.getNode(ISD::BITCAST, dl,
- MVT::f32, TwoToFractionalPartOfX);
- } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ } else { // LimitFloatPrecision <= 18
// For floating-point precision of 18:
//
// TwoToFractionalPartOfX =
@@ -4362,24 +4258,18 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
getF32Constant(DAG, 0x3f317234));
SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
- SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
- getF32Constant(DAG, 0x3f800000));
- SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
- SDValue TwoToFractionalPartOfX =
- DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
-
- result = DAG.getNode(ISD::BITCAST, dl,
- MVT::f32, TwoToFractionalPartOfX);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
}
- } else {
- // No special expansion.
- result = DAG.getNode(ISD::FPOW, dl,
- getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1)));
+
+ SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::ADD, dl, MVT::i32,
+ t13, IntegerPartOfX));
}
- setValue(&I, result);
+ // No special expansion.
+ return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
}
@@ -4873,7 +4763,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
// to be zero.
// We must do this early because v2i32 is not a legal type.
- DebugLoc dl = getCurDebugLoc();
SDValue ShOps[2];
ShOps[0] = ShAmt;
ShOps[1] = DAG.getConstant(0, MVT::i32);
@@ -4890,7 +4779,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::x86_avx_vinsertf128_ps_256:
case Intrinsic::x86_avx_vinsertf128_si_256:
case Intrinsic::x86_avx2_vinserti128: {
- DebugLoc dl = getCurDebugLoc();
EVT DestVT = TLI.getValueType(I.getType());
EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
@@ -4906,7 +4794,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::x86_avx_vextractf128_ps_256:
case Intrinsic::x86_avx_vextractf128_si_256:
case Intrinsic::x86_avx2_vextracti128: {
- DebugLoc dl = getCurDebugLoc();
EVT DestVT = TLI.getValueType(I.getType());
uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
DestVT.getVectorNumElements();
@@ -4940,7 +4827,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
EVT DestVT = TLI.getValueType(I.getType());
const Value *Op1 = I.getArgOperand(0);
- Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
+ Res = DAG.getConvertRndSat(DestVT, dl, getValue(Op1),
DAG.getValueType(DestVT),
DAG.getValueType(getValue(Op1).getValueType()),
getValue(I.getArgOperand(1)),
@@ -4949,53 +4836,57 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, Res);
return 0;
}
- case Intrinsic::sqrt:
- setValue(&I, DAG.getNode(ISD::FSQRT, dl,
- getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0))));
- return 0;
case Intrinsic::powi:
setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), DAG));
return 0;
- case Intrinsic::sin:
- setValue(&I, DAG.getNode(ISD::FSIN, dl,
- getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0))));
- return 0;
- case Intrinsic::cos:
- setValue(&I, DAG.getNode(ISD::FCOS, dl,
- getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0))));
- return 0;
case Intrinsic::log:
- visitLog(I);
+ setValue(&I, expandLog(dl, getValue(I.getArgOperand(0)), DAG, TLI));
return 0;
case Intrinsic::log2:
- visitLog2(I);
+ setValue(&I, expandLog2(dl, getValue(I.getArgOperand(0)), DAG, TLI));
return 0;
case Intrinsic::log10:
- visitLog10(I);
+ setValue(&I, expandLog10(dl, getValue(I.getArgOperand(0)), DAG, TLI));
return 0;
case Intrinsic::exp:
- visitExp(I);
+ setValue(&I, expandExp(dl, getValue(I.getArgOperand(0)), DAG, TLI));
return 0;
case Intrinsic::exp2:
- visitExp2(I);
+ setValue(&I, expandExp2(dl, getValue(I.getArgOperand(0)), DAG, TLI));
return 0;
case Intrinsic::pow:
- visitPow(I);
+ setValue(&I, expandPow(dl, getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), DAG, TLI));
return 0;
+ case Intrinsic::sqrt:
case Intrinsic::fabs:
- setValue(&I, DAG.getNode(ISD::FABS, dl,
- getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0))));
- return 0;
+ case Intrinsic::sin:
+ case Intrinsic::cos:
case Intrinsic::floor:
- setValue(&I, DAG.getNode(ISD::FFLOOR, dl,
+ case Intrinsic::ceil:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint: {
+ unsigned Opcode;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
+ case Intrinsic::fabs: Opcode = ISD::FABS; break;
+ case Intrinsic::sin: Opcode = ISD::FSIN; break;
+ case Intrinsic::cos: Opcode = ISD::FCOS; break;
+ case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
+ case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
+ case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
+ case Intrinsic::rint: Opcode = ISD::FRINT; break;
+ case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ }
+
+ setValue(&I, DAG.getNode(Opcode, dl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
return 0;
+ }
case Intrinsic::fma:
setValue(&I, DAG.getNode(ISD::FMA, dl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -5006,7 +4897,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
- TLI.isOperationLegal(ISD::FMA, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT) &&
TLI.isFMAFasterThanMulAndAdd(VT)){
setValue(&I, DAG.getNode(ISD::FMA, dl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -5103,7 +4994,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
// Store the stack protector onto the stack.
- Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
+ Res = DAG.getStore(getRoot(), dl, Src, FIN,
MachinePointerInfo::getFixedStack(FI),
true, false, 0);
setValue(&I, Res);
@@ -5191,7 +5082,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
/*isTailCall=*/false,
/*doesNotRet=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
- Args, DAG, getCurDebugLoc());
+ Args, DAG, dl);
std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
DAG.setRoot(Result.second);
return 0;
@@ -5217,7 +5108,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Op2 = getValue(I.getArgOperand(1));
SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
- setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
+ setValue(&I, DAG.getNode(Op, dl, VTs, Op1, Op2));
return 0;
}
case Intrinsic::prefetch: {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 9e46d9664f..5818c09f29 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -533,13 +533,6 @@ private:
const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
- void visitPow(const CallInst &I);
- void visitExp2(const CallInst &I);
- void visitExp(const CallInst &I);
- void visitLog(const CallInst &I);
- void visitLog2(const CallInst &I);
- void visitLog10(const CallInst &I);
-
void visitVAStart(const CallInst &I);
void visitVAArg(const VAArgInst &I);
void visitVAEnd(const CallInst &I);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 20afa3def3..193f0e7d09 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -45,6 +45,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
@@ -216,8 +217,9 @@ namespace llvm {
ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel) {
const TargetLowering &TLI = IS->getTargetLowering();
+ const TargetSubtargetInfo &ST = IS->TM.getSubtarget<TargetSubtargetInfo>();
- if (OptLevel == CodeGenOpt::None ||
+ if (OptLevel == CodeGenOpt::None || ST.enableMachineScheduler() ||
TLI.getSchedulingPreference() == Sched::Source)
return createSourceListDAGScheduler(IS, OptLevel);
if (TLI.getSchedulingPreference() == Sched::RegPressure)
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 49f55e2fc6..794935dad5 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -758,14 +758,13 @@ void TargetLowering::computeRegisterProperties() {
// Every integer value type larger than this largest register takes twice as
// many registers to represent as the previous ValueType.
- for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) {
- EVT ExpandedVT = (MVT::SimpleValueType)ExpandedReg;
- if (!ExpandedVT.isInteger())
- break;
+ for (unsigned ExpandedReg = LargestIntReg + 1;
+ ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) {
NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
- ValueTypeActions.setTypeAction(ExpandedVT, TypeExpandInteger);
+ ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg,
+ TypeExpandInteger);
}
// Inspect all of the ValueType's smaller than the largest integer
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index 1cbee843a1..e306a2f2c2 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -720,7 +720,9 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
// and continue.
// Sort the slots according to their size. Place unused slots at the end.
- std::sort(SortedSlots.begin(), SortedSlots.end(), SlotSizeSorter(MFI));
+ // Use stable sort to guarantee deterministic code generation.
+ std::stable_sort(SortedSlots.begin(), SortedSlots.end(),
+ SlotSizeSorter(MFI));
bool Chanded = true;
while (Chanded) {
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 4439192fe2..433f2ea061 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -472,7 +472,8 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
// stack slot reference to depend on the instruction that does the
// modification.
const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
- if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore()))
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI))
return true;
return false;
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 6df4a0aa2a..0fa68c4e1a 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -104,6 +104,36 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
Streamer.EmitSymbolValue(Sym, Size);
}
+const MCExpr *TargetLoweringObjectFileELF::
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
+
+ if (Encoding & dwarf::DW_EH_PE_indirect) {
+ MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ SmallString<128> Name;
+ Mang->getNameWithPrefix(Name, GV, true);
+ Name += ".DW.stub";
+
+ // Add information about the stub reference to ELFMMI so that the stub
+ // gets emitted by the asmprinter.
+ MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
+ if (StubSym.getPointer() == 0) {
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+ }
+
+ return TargetLoweringObjectFile::
+ getTTypeReference(MCSymbolRefExpr::Create(SSym, getContext()),
+ Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+ }
+
+ return TargetLoweringObjectFile::
+ getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
static SectionKind
getELFKindForNamedSection(StringRef Name, SectionKind K) {
// N.B.: The defaults used in here are no the same ones used in MC.
@@ -330,35 +360,6 @@ getSectionForConstant(SectionKind Kind) const {
return DataRelROSection;
}
-const MCExpr *TargetLoweringObjectFileELF::
-getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
- MachineModuleInfo *MMI,
- unsigned Encoding, MCStreamer &Streamer) const {
-
- if (Encoding & dwarf::DW_EH_PE_indirect) {
- MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
- SmallString<128> Name;
- Mang->getNameWithPrefix(Name, GV, true);
- Name += ".DW.stub";
-
- // Add information about the stub reference to ELFMMI so that the stub
- // gets emitted by the asmprinter.
- MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
- MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
- if (StubSym.getPointer() == 0) {
- MCSymbol *Sym = Mang->getSymbol(GV);
- StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
- }
-
- return TargetLoweringObjectFile::
- getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
- }
-
- return TargetLoweringObjectFile::
- getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
-}
-
const MCSection *
TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const {
// The default scheme is .ctor / .dtor, so we have to invert the priority
@@ -620,9 +621,9 @@ shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
}
const MCExpr *TargetLoweringObjectFileMachO::
-getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
- MachineModuleInfo *MMI, unsigned Encoding,
- MCStreamer &Streamer) const {
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
// The mach-o version of this method defaults to returning a stub reference.
if (Encoding & DW_EH_PE_indirect) {
@@ -645,11 +646,12 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
}
return TargetLoweringObjectFile::
- getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+ getTTypeReference(MCSymbolRefExpr::Create(SSym, getContext()),
+ Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
}
return TargetLoweringObjectFile::
- getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+ getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer);
}
MCSymbol *TargetLoweringObjectFileMachO::
@@ -717,8 +719,19 @@ getCOFFSectionFlags(SectionKind K) {
const MCSection *TargetLoweringObjectFileCOFF::
getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler *Mang, const TargetMachine &TM) const {
- return getContext().getCOFFSection(GV->getSection(),
- getCOFFSectionFlags(Kind),
+ int Selection = 0;
+ unsigned Characteristics = getCOFFSectionFlags(Kind);
+ SmallString<128> Name(GV->getSection().c_str());
+ if (GV->isWeakForLinker()) {
+ Selection = COFF::IMAGE_COMDAT_SELECT_ANY;
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+ MCSymbol *Sym = Mang->getSymbol(GV);
+ Name.append("$");
+ Name.append(Sym->getName().begin() + 1, Sym->getName().end());
+ }
+ return getContext().getCOFFSection(Name,
+ Characteristics,
+ Selection,
Kind);
}
diff --git a/lib/DebugInfo/DIContext.cpp b/lib/DebugInfo/DIContext.cpp
index 691a92c392..49a44097d3 100644
--- a/lib/DebugInfo/DIContext.cpp
+++ b/lib/DebugInfo/DIContext.cpp
@@ -13,15 +13,6 @@ using namespace llvm;
DIContext::~DIContext() {}
-DIContext *DIContext::getDWARFContext(bool isLittleEndian,
- StringRef infoSection,
- StringRef abbrevSection,
- StringRef aRangeSection,
- StringRef lineSection,
- StringRef stringSection,
- StringRef rangeSection,
- const RelocAddrMap &Map) {
- return new DWARFContextInMemory(isLittleEndian, infoSection, abbrevSection,
- aRangeSection, lineSection, stringSection,
- rangeSection, Map);
+DIContext *DIContext::getDWARFContext(object::ObjectFile *Obj) {
+ return new DWARFContextInMemory(Obj);
}
diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp
index afd614cc35..5fa6534067 100644
--- a/lib/DebugInfo/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARFContext.cpp
@@ -91,8 +91,10 @@ const DWARFDebugAranges *DWARFContext::getDebugAranges() {
Aranges.reset(new DWARFDebugAranges());
Aranges->extract(arangesData);
- if (Aranges->isEmpty()) // No aranges in file, generate them from the DIEs.
- Aranges->generate(this);
+ // Generate aranges from DIEs: even if .debug_aranges section is present,
+ // it may describe only a small subset of compilation units, so we need to
+ // manually build aranges for the rest of them.
+ Aranges->generate(this);
return Aranges.get();
}
@@ -298,4 +300,84 @@ DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address,
return InliningInfo;
}
+DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) :
+ IsLittleEndian(true /* FIXME */) {
+ error_code ec;
+ for (object::section_iterator i = Obj->begin_sections(),
+ e = Obj->end_sections();
+ i != e; i.increment(ec)) {
+ StringRef name;
+ i->getName(name);
+ StringRef data;
+ i->getContents(data);
+
+ if (name.startswith("__DWARF,"))
+ name = name.substr(8); // Skip "__DWARF," prefix.
+ name = name.substr(name.find_first_not_of("._")); // Skip . and _ prefixes.
+ if (name == "debug_info")
+ InfoSection = data;
+ else if (name == "debug_abbrev")
+ AbbrevSection = data;
+ else if (name == "debug_line")
+ LineSection = data;
+ else if (name == "debug_aranges")
+ ARangeSection = data;
+ else if (name == "debug_str")
+ StringSection = data;
+ else if (name == "debug_ranges")
+ RangeSection = data;
+ // Any more debug info sections go here.
+ else
+ continue;
+
+ // TODO: For now only handle relocations for the debug_info section.
+ if (name != "debug_info")
+ continue;
+
+ if (i->begin_relocations() != i->end_relocations()) {
+ uint64_t SectionSize;
+ i->getSize(SectionSize);
+ for (object::relocation_iterator reloc_i = i->begin_relocations(),
+ reloc_e = i->end_relocations();
+ reloc_i != reloc_e; reloc_i.increment(ec)) {
+ uint64_t Address;
+ reloc_i->getAddress(Address);
+ uint64_t Type;
+ reloc_i->getType(Type);
+
+ object::RelocVisitor V(Obj->getFileFormatName());
+ // The section address is always 0 for debug sections.
+ object::RelocToApply R(V.visit(Type, *reloc_i));
+ if (V.error()) {
+ SmallString<32> Name;
+ error_code ec(reloc_i->getTypeName(Name));
+ if (ec) {
+ errs() << "Aaaaaa! Nameless relocation! Aaaaaa!\n";
+ }
+ errs() << "error: failed to compute relocation: "
+ << Name << "\n";
+ continue;
+ }
+
+ if (Address + R.Width > SectionSize) {
+ errs() << "error: " << R.Width << "-byte relocation starting "
+ << Address << " bytes into section " << name << " which is "
+ << SectionSize << " bytes long.\n";
+ continue;
+ }
+ if (R.Width > 8) {
+ errs() << "error: can't handle a relocation of more than 8 bytes at "
+ "a time.\n";
+ continue;
+ }
+ DEBUG(dbgs() << "Writing " << format("%p", R.Value)
+ << " at " << format("%p", Address)
+ << " with width " << format("%d", R.Width)
+ << "\n");
+ RelocMap[Address] = std::make_pair(R.Width, R.Value);
+ }
+ }
+ }
+}
+
void DWARFContextInMemory::anchor() { }
diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h
index 4001792b3d..ff161e2aad 100644
--- a/lib/DebugInfo/DWARFContext.h
+++ b/lib/DebugInfo/DWARFContext.h
@@ -25,9 +25,6 @@ namespace llvm {
/// information parsing. The actual data is supplied through pure virtual
/// methods that a concrete implementation provides.
class DWARFContext : public DIContext {
- bool IsLittleEndian;
- const RelocAddrMap &RelocMap;
-
SmallVector<DWARFCompileUnit, 1> CUs;
OwningPtr<DWARFDebugAbbrev> Abbrev;
OwningPtr<DWARFDebugAranges> Aranges;
@@ -38,10 +35,9 @@ class DWARFContext : public DIContext {
/// Read compile units from the debug_info section and store them in CUs.
void parseCompileUnits();
-protected:
- DWARFContext(bool isLittleEndian, const RelocAddrMap &Map) :
- IsLittleEndian(isLittleEndian), RelocMap(Map) {}
+
public:
+ DWARFContext() {}
virtual void dump(raw_ostream &OS);
/// Get the number of compile units in this context.
@@ -72,9 +68,8 @@ public:
virtual DIInliningInfo getInliningInfoForAddress(uint64_t Address,
DILineInfoSpecifier Specifier = DILineInfoSpecifier());
- bool isLittleEndian() const { return IsLittleEndian; }
- const RelocAddrMap &relocMap() const { return RelocMap; }
-
+ virtual bool isLittleEndian() const = 0;
+ virtual const RelocAddrMap &relocMap() const = 0;
virtual StringRef getInfoSection() = 0;
virtual StringRef getAbbrevSection() = 0;
virtual StringRef getARangeSection() = 0;
@@ -99,6 +94,8 @@ private:
/// pointers to it.
class DWARFContextInMemory : public DWARFContext {
virtual void anchor();
+ bool IsLittleEndian;
+ RelocAddrMap RelocMap;
StringRef InfoSection;
StringRef AbbrevSection;
StringRef ARangeSection;
@@ -106,23 +103,9 @@ class DWARFContextInMemory : public DWARFContext {
StringRef StringSection;
StringRef RangeSection;
public:
- DWARFContextInMemory(bool isLittleEndian,
- StringRef infoSection,
- StringRef abbrevSection,
- StringRef aRangeSection,
- StringRef lineSection,
- StringRef stringSection,
- StringRef rangeSection,
- const RelocAddrMap &Map = RelocAddrMap())
- : DWARFContext(isLittleEndian, Map),
- InfoSection(infoSection),
- AbbrevSection(abbrevSection),
- ARangeSection(aRangeSection),
- LineSection(lineSection),
- StringSection(stringSection),
- RangeSection(rangeSection)
- {}
-
+ DWARFContextInMemory(object::ObjectFile *);
+ virtual bool isLittleEndian() const { return IsLittleEndian; }
+ virtual const RelocAddrMap &relocMap() const { return RelocMap; }
virtual StringRef getInfoSection() { return InfoSection; }
virtual StringRef getAbbrevSection() { return AbbrevSection; }
virtual StringRef getARangeSection() { return ARangeSection; }
diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp
index f9a34c908f..b077eb5e38 100644
--- a/lib/DebugInfo/DWARFDebugAranges.cpp
+++ b/lib/DebugInfo/DWARFDebugAranges.cpp
@@ -26,34 +26,40 @@ namespace {
class CountArangeDescriptors {
public:
CountArangeDescriptors(uint32_t &count_ref) : Count(count_ref) {}
- void operator()(const DWARFDebugArangeSet &set) {
- Count += set.getNumDescriptors();
+ void operator()(const DWARFDebugArangeSet &Set) {
+ Count += Set.getNumDescriptors();
}
uint32_t &Count;
};
class AddArangeDescriptors {
public:
- AddArangeDescriptors(DWARFDebugAranges::RangeColl &ranges)
- : RangeCollection(ranges) {}
- void operator()(const DWARFDebugArangeSet& set) {
- const DWARFDebugArangeSet::Descriptor* arange_desc_ptr;
- DWARFDebugAranges::Range range;
- range.Offset = set.getCompileUnitDIEOffset();
-
- for (uint32_t i=0; (arange_desc_ptr = set.getDescriptor(i)) != NULL; ++i){
- range.LoPC = arange_desc_ptr->Address;
- range.Length = arange_desc_ptr->Length;
+ AddArangeDescriptors(DWARFDebugAranges::RangeColl &Ranges,
+ DWARFDebugAranges::ParsedCUOffsetColl &CUOffsets)
+ : RangeCollection(Ranges),
+ CUOffsetCollection(CUOffsets) {}
+ void operator()(const DWARFDebugArangeSet &Set) {
+ DWARFDebugAranges::Range Range;
+ Range.Offset = Set.getCompileUnitDIEOffset();
+ CUOffsetCollection.insert(Range.Offset);
+
+ for (uint32_t i = 0, n = Set.getNumDescriptors(); i < n; ++i) {
+ const DWARFDebugArangeSet::Descriptor *ArangeDescPtr =
+ Set.getDescriptor(i);
+ Range.LoPC = ArangeDescPtr->Address;
+ Range.Length = ArangeDescPtr->Length;
// Insert each item in increasing address order so binary searching
// can later be done!
- DWARFDebugAranges::RangeColl::iterator insert_pos =
+ DWARFDebugAranges::RangeColl::iterator InsertPos =
std::lower_bound(RangeCollection.begin(), RangeCollection.end(),
- range, RangeLessThan);
- RangeCollection.insert(insert_pos, range);
+ Range, RangeLessThan);
+ RangeCollection.insert(InsertPos, Range);
}
+
}
- DWARFDebugAranges::RangeColl& RangeCollection;
+ DWARFDebugAranges::RangeColl &RangeCollection;
+ DWARFDebugAranges::ParsedCUOffsetColl &CUOffsetCollection;
};
}
@@ -75,7 +81,7 @@ bool DWARFDebugAranges::extract(DataExtractor debug_aranges_data) {
if (count > 0) {
Aranges.reserve(count);
- AddArangeDescriptors range_adder(Aranges);
+ AddArangeDescriptors range_adder(Aranges, ParsedCUOffsets);
std::for_each(sets.begin(), sets.end(), range_adder);
}
}
@@ -83,13 +89,14 @@ bool DWARFDebugAranges::extract(DataExtractor debug_aranges_data) {
}
bool DWARFDebugAranges::generate(DWARFContext *ctx) {
- clear();
if (ctx) {
const uint32_t num_compile_units = ctx->getNumCompileUnits();
for (uint32_t cu_idx = 0; cu_idx < num_compile_units; ++cu_idx) {
- DWARFCompileUnit *cu = ctx->getCompileUnitAtIndex(cu_idx);
- if (cu)
- cu->buildAddressRangeTable(this, true);
+ if (DWARFCompileUnit *cu = ctx->getCompileUnitAtIndex(cu_idx)) {
+ uint32_t CUOffset = cu->getOffset();
+ if (ParsedCUOffsets.insert(CUOffset).second)
+ cu->buildAddressRangeTable(this, true);
+ }
}
}
sort(true, /* overlap size */ 0);
diff --git a/lib/DebugInfo/DWARFDebugAranges.h b/lib/DebugInfo/DWARFDebugAranges.h
index 12afb60beb..1509ffad41 100644
--- a/lib/DebugInfo/DWARFDebugAranges.h
+++ b/lib/DebugInfo/DWARFDebugAranges.h
@@ -11,6 +11,7 @@
#define LLVM_DEBUGINFO_DWARFDEBUGARANGES_H
#include "DWARFDebugArangeSet.h"
+#include "llvm/ADT/DenseSet.h"
#include <list>
namespace llvm {
@@ -60,7 +61,10 @@ public:
uint32_t Offset; // Offset of the compile unit or die
};
- void clear() { Aranges.clear(); }
+ void clear() {
+ Aranges.clear();
+ ParsedCUOffsets.clear();
+ }
bool allRangesAreContiguous(uint64_t& LoPC, uint64_t& HiPC) const;
bool getMaxRange(uint64_t& LoPC, uint64_t& HiPC) const;
bool extract(DataExtractor debug_aranges_data);
@@ -88,9 +92,11 @@ public:
typedef std::vector<Range> RangeColl;
typedef RangeColl::const_iterator RangeCollIterator;
+ typedef DenseSet<uint32_t> ParsedCUOffsetColl;
private:
RangeColl Aranges;
+ ParsedCUOffsetColl ParsedCUOffsets;
};
}
diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp
index fea9fd7f7d..1d8ea01110 100644
--- a/lib/DebugInfo/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARFFormValue.cpp
@@ -46,6 +46,8 @@ static const uint8_t form_sizes_addr4[] = {
0, // 0x18 DW_FORM_exprloc
0, // 0x19 DW_FORM_flag_present
8, // 0x20 DW_FORM_ref_sig8
+ 4, // 0x1f01 DW_FORM_GNU_addr_index
+ 4, // 0x1f02 DW_FORM_GNU_str_index
};
static const uint8_t form_sizes_addr8[] = {
@@ -76,6 +78,8 @@ static const uint8_t form_sizes_addr8[] = {
0, // 0x18 DW_FORM_exprloc
0, // 0x19 DW_FORM_flag_present
8, // 0x20 DW_FORM_ref_sig8
+ 8, // 0x1f01 DW_FORM_GNU_addr_index
+ 8, // 0x1f01 DW_FORM_GNU_str_index
};
const uint8_t *
@@ -108,8 +112,8 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
*offset_ptr += R.first;
} else
Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize());
- }
break;
+ }
case DW_FORM_exprloc:
case DW_FORM_block:
Value.uval = data.getULEB128(offset_ptr);
@@ -185,6 +189,12 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
case DW_FORM_ref_sig8:
Value.uval = data.getU64(offset_ptr);
break;
+ case DW_FORM_GNU_addr_index:
+ Value.uval = data.getULEB128(offset_ptr);
+ break;
+ case DW_FORM_GNU_str_index:
+ Value.uval = data.getULEB128(offset_ptr);
+ break;
default:
return false;
}
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
index 4cb0270d57..31cee16039 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Errno.h"
@@ -41,6 +42,11 @@ class IntelJITEventListener : public JITEventListener {
MethodIDMap MethodIDs;
FilenameCache Filenames;
+ typedef SmallVector<const void *, 64> MethodAddressVector;
+ typedef DenseMap<const void *, MethodAddressVector> ObjectMap;
+
+ ObjectMap LoadedObjectMap;
+
public:
IntelJITEventListener(IntelJITEventsWrapper* libraryWrapper) {
Wrapper.reset(libraryWrapper);
@@ -169,9 +175,78 @@ void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
}
void IntelJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) {
+ // Get the address of the object image for use as a unique identifier
+ const void* ObjData = Obj.getData().data();
+ MethodAddressVector Functions;
+
+ // Use symbol info to iterate functions in the object.
+ error_code ec;
+ for (object::symbol_iterator I = Obj.begin_symbols(),
+ E = Obj.end_symbols();
+ I != E && !ec;
+ I.increment(ec)) {
+ object::SymbolRef::Type SymType;
+ if (I->getType(SymType)) continue;
+ if (SymType == object::SymbolRef::ST_Function) {
+ StringRef Name;
+ uint64_t Addr;
+ uint64_t Size;
+ if (I->getName(Name)) continue;
+ if (I->getAddress(Addr)) continue;
+ if (I->getSize(Size)) continue;
+
+ // Record this address in a local vector
+ Functions.push_back((void*)Addr);
+
+ // Build the function loaded notification message
+ iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(*Wrapper,
+ Name.data(),
+ Addr,
+ Size);
+
+ // FIXME: Try to find line info for this function in the DWARF sections.
+ FunctionMessage.source_file_name = 0;
+ FunctionMessage.line_number_size = 0;
+ FunctionMessage.line_number_table = 0;
+
+ Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
+ &FunctionMessage);
+ MethodIDs[(void*)Addr] = FunctionMessage.method_id;
+ }
+ }
+
+ // To support object unload notification, we need to keep a list of
+ // registered function addresses for each loaded object. We will
+ // use the MethodIDs map to get the registered ID for each function.
+ LoadedObjectMap[ObjData] = Functions;
}
void IntelJITEventListener::NotifyFreeingObject(const ObjectImage &Obj) {
+ // Get the address of the object image for use as a unique identifier
+ const void* ObjData = Obj.getData().data();
+
+ // Get the object's function list from LoadedObjectMap
+ ObjectMap::iterator OI = LoadedObjectMap.find(ObjData);
+ if (OI == LoadedObjectMap.end())
+ return;
+ MethodAddressVector& Functions = OI->second;
+
+ // Walk the function list, unregistering each function
+ for (MethodAddressVector::iterator FI = Functions.begin(),
+ FE = Functions.end();
+ FI != FE;
+ ++FI) {
+ void* FnStart = const_cast<void*>(*FI);
+ MethodIDMap::iterator MI = MethodIDs.find(FnStart);
+ if (MI != MethodIDs.end()) {
+ Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START,
+ &MI->second);
+ MethodIDs.erase(MI);
+ }
+ }
+
+ // Erase the object from LoadedObjectMap
+ LoadedObjectMap.erase(OI);
}
} // anonymous namespace.
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
index 7ab08e15a8..3d9ff53516 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
@@ -35,8 +35,6 @@ class IntelJITEventsWrapper {
NotifyEventPtr NotifyEventFunc;
RegisterCallbackExPtr RegisterCallbackExFunc;
IsProfilingActivePtr IsProfilingActiveFunc;
- FinalizeThreadPtr FinalizeThreadFunc;
- FinalizeProcessPtr FinalizeProcessFunc;
GetNewMethodIDPtr GetNewMethodIDFunc;
public:
@@ -48,8 +46,6 @@ public:
: NotifyEventFunc(::iJIT_NotifyEvent),
RegisterCallbackExFunc(::iJIT_RegisterCallbackEx),
IsProfilingActiveFunc(::iJIT_IsProfilingActive),
- FinalizeThreadFunc(::FinalizeThread),
- FinalizeProcessFunc(::FinalizeProcess),
GetNewMethodIDFunc(::iJIT_GetNewMethodID) {
}
@@ -62,8 +58,6 @@ public:
: NotifyEventFunc(NotifyEventImpl),
RegisterCallbackExFunc(RegisterCallbackExImpl),
IsProfilingActiveFunc(IsProfilingActiveImpl),
- FinalizeThreadFunc(FinalizeThreadImpl),
- FinalizeProcessFunc(FinalizeProcessImpl),
GetNewMethodIDFunc(GetNewMethodIDImpl) {
}
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index 61bc119d30..bd0519e9c4 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -501,10 +501,14 @@ namespace {
/// allocateDataSection - Allocate memory for a data section.
uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID) {
+ unsigned SectionID, bool IsReadOnly) {
return (uint8_t*)DataAllocator.Allocate(Size, Alignment);
}
+ bool applyPermissions(std::string *ErrMsg) {
+ return false;
+ }
+
/// startExceptionTable - Use startFunctionBody to allocate memory for the
/// function's exception table.
uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) {
diff --git a/lib/ExecutionEngine/JIT/NaClJITMemoryManager.cpp b/lib/ExecutionEngine/JIT/NaClJITMemoryManager.cpp
index d44fee2292..d1b5ee704a 100644
--- a/lib/ExecutionEngine/JIT/NaClJITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/NaClJITMemoryManager.cpp
@@ -227,7 +227,8 @@ uint8_t *NaClJITMemoryManager::allocateCodeSection(uintptr_t Size,
uint8_t *NaClJITMemoryManager::allocateDataSection(uintptr_t Size,
unsigned Alignment,
- unsigned SectionID) {
+ unsigned SectionID,
+ bool IsReadOnly) {
return (uint8_t *)DataAllocator.Allocate(Size, Alignment);
}
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 752c5b73ea..d72e56378b 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -118,17 +118,26 @@ void MCJIT::emitObject(Module *m) {
// FIXME: Add a parameter to identify which object is being finalized when
// MCJIT supports multiple modules.
+// FIXME: Provide a way to separate code emission, relocations and page
+// protection in the interface.
void MCJIT::finalizeObject() {
// If the module hasn't been compiled, just do that.
if (!isCompiled) {
// If the call to Dyld.resolveRelocations() is removed from emitObject()
// we'll need to do that here.
emitObject(M);
+
+ // Set page permissions.
+ MemMgr->applyPermissions();
+
return;
}
// Resolve any relocations.
Dyld.resolveRelocations();
+
+ // Set page permissions.
+ MemMgr->applyPermissions();
}
void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) {
diff --git a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
index 17f3a21464..097df35a5c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
+++ b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
@@ -1,76 +1,76 @@
-//===-- ObjectImageCommon.h - Format independent executuable object image -===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares a file format independent ObjectImage class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H
-#define LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H
-
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/ExecutionEngine/ObjectImage.h"
-#include "llvm/ExecutionEngine/ObjectBuffer.h"
-
-namespace llvm {
-
-class ObjectImageCommon : public ObjectImage {
- ObjectImageCommon(); // = delete
- ObjectImageCommon(const ObjectImageCommon &other); // = delete
-
-protected:
- object::ObjectFile *ObjFile;
-
- // This form of the constructor allows subclasses to use
- // format-specific subclasses of ObjectFile directly
- ObjectImageCommon(ObjectBuffer *Input, object::ObjectFile *Obj)
- : ObjectImage(Input), // saves Input as Buffer and takes ownership
- ObjFile(Obj)
- {
- }
-
-public:
- ObjectImageCommon(ObjectBuffer* Input)
- : ObjectImage(Input) // saves Input as Buffer and takes ownership
- {
- ObjFile = object::ObjectFile::createObjectFile(Buffer->getMemBuffer());
- }
- virtual ~ObjectImageCommon() { delete ObjFile; }
-
- virtual object::symbol_iterator begin_symbols() const
- { return ObjFile->begin_symbols(); }
- virtual object::symbol_iterator end_symbols() const
- { return ObjFile->end_symbols(); }
-
- virtual object::section_iterator begin_sections() const
- { return ObjFile->begin_sections(); }
- virtual object::section_iterator end_sections() const
- { return ObjFile->end_sections(); }
-
- virtual /* Triple::ArchType */ unsigned getArch() const
- { return ObjFile->getArch(); }
-
- virtual StringRef getData() const { return ObjFile->getData(); }
-
- // Subclasses can override these methods to update the image with loaded
- // addresses for sections and common symbols
- virtual void updateSectionAddress(const object::SectionRef &Sec,
- uint64_t Addr) {}
- virtual void updateSymbolAddress(const object::SymbolRef &Sym, uint64_t Addr)
- {}
-
- // Subclasses can override these methods to provide JIT debugging support
- virtual void registerWithDebugger() {}
- virtual void deregisterWithDebugger() {}
-};
-
-} // end namespace llvm
-
-#endif // LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H
-
+//===-- ObjectImageCommon.h - Format independent executuable object image -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares a file format independent ObjectImage class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H
+#define LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H
+
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/ExecutionEngine/ObjectBuffer.h"
+
+namespace llvm {
+
+class ObjectImageCommon : public ObjectImage {
+ ObjectImageCommon(); // = delete
+ ObjectImageCommon(const ObjectImageCommon &other); // = delete
+
+protected:
+ object::ObjectFile *ObjFile;
+
+ // This form of the constructor allows subclasses to use
+ // format-specific subclasses of ObjectFile directly
+ ObjectImageCommon(ObjectBuffer *Input, object::ObjectFile *Obj)
+ : ObjectImage(Input), // saves Input as Buffer and takes ownership
+ ObjFile(Obj)
+ {
+ }
+
+public:
+ ObjectImageCommon(ObjectBuffer* Input)
+ : ObjectImage(Input) // saves Input as Buffer and takes ownership
+ {
+ ObjFile = object::ObjectFile::createObjectFile(Buffer->getMemBuffer());
+ }
+ virtual ~ObjectImageCommon() { delete ObjFile; }
+
+ virtual object::symbol_iterator begin_symbols() const
+ { return ObjFile->begin_symbols(); }
+ virtual object::symbol_iterator end_symbols() const
+ { return ObjFile->end_symbols(); }
+
+ virtual object::section_iterator begin_sections() const
+ { return ObjFile->begin_sections(); }
+ virtual object::section_iterator end_sections() const
+ { return ObjFile->end_sections(); }
+
+ virtual /* Triple::ArchType */ unsigned getArch() const
+ { return ObjFile->getArch(); }
+
+ virtual StringRef getData() const { return ObjFile->getData(); }
+
+ // Subclasses can override these methods to update the image with loaded
+ // addresses for sections and common symbols
+ virtual void updateSectionAddress(const object::SectionRef &Sec,
+ uint64_t Addr) {}
+ virtual void updateSymbolAddress(const object::SymbolRef &Sym, uint64_t Addr)
+ {}
+
+ // Subclasses can override these methods to provide JIT debugging support
+ virtual void registerWithDebugger() {}
+ virtual void deregisterWithDebugger() {}
+};
+
+} // end namespace llvm
+
+#endif // LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H
+
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index f6dccb106d..e6e1bdc8b1 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -125,9 +125,7 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) {
<< " flags: " << flags
<< " SID: " << SectionID
<< " Offset: " << format("%p", SectOffset));
- bool isGlobal = flags & SymbolRef::SF_Global;
- if (isGlobal)
- GlobalSymbolTable[Name] = SymbolLoc(SectionID, SectOffset);
+ GlobalSymbolTable[Name] = SymbolLoc(SectionID, SectOffset);
}
}
DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name << "\n");
@@ -182,7 +180,7 @@ void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj,
// Allocate memory for the section
unsigned SectionID = Sections.size();
uint8_t *Addr = MemMgr->allocateDataSection(TotalSize, sizeof(void*),
- SectionID);
+ SectionID, false);
if (!Addr)
report_fatal_error("Unable to allocate memory for common symbols!");
uint64_t Offset = 0;
@@ -237,11 +235,13 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
bool IsRequired;
bool IsVirtual;
bool IsZeroInit;
+ bool IsReadOnly;
uint64_t DataSize;
StringRef Name;
Check(Section.isRequiredForExecution(IsRequired));
Check(Section.isVirtual(IsVirtual));
Check(Section.isZeroInit(IsZeroInit));
+ Check(Section.isReadOnlyData(IsReadOnly));
Check(Section.getSize(DataSize));
Check(Section.getName(Name));
@@ -256,7 +256,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
Allocate = DataSize + StubBufSize;
Addr = IsCode
? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID)
- : MemMgr->allocateDataSection(Allocate, Alignment, SectionID);
+ : MemMgr->allocateDataSection(Allocate, Alignment, SectionID, IsReadOnly);
if (!Addr)
report_fatal_error("Unable to allocate section memory!");
@@ -451,6 +451,12 @@ void RuntimeDyldImpl::resolveExternalSymbols() {
//===----------------------------------------------------------------------===//
// RuntimeDyld class implementation
RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
+ // FIXME: There's a potential issue lurking here if a single instance of
+ // RuntimeDyld is used to load multiple objects. The current implementation
+ // associates a single memory manager with a RuntimeDyld instance. Even
+ // though the public class spawns a new 'impl' instance for each load,
+ // they share a single memory manager. This can become a problem when page
+ // permissions are applied.
Dyld = 0;
MM = mm;
}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 1ebcaf7ba8..74bb46dfcd 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -290,6 +290,7 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
// Write a 32bit value to relocation address, taking into account the
// implicit addend encoded in the target.
+ case ELF::R_ARM_TARGET1 :
case ELF::R_ARM_ABS32 :
*TargetPtr += Value;
break;
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 16d1fff8a6..3309bb3e64 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -233,6 +233,8 @@ public:
virtual void EmitCFIRelOffset(int64_t Register, int64_t Offset);
virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment);
virtual void EmitCFISignalFrame();
+ virtual void EmitCFIUndefined(int64_t Register);
+ virtual void EmitCFIRegister(int64_t Register1, int64_t Register2);
virtual void EmitWin64EHStartProc(const MCSymbol *Symbol);
virtual void EmitWin64EHEndProc();
@@ -1064,6 +1066,26 @@ void MCAsmStreamer::EmitCFISignalFrame() {
EmitEOL();
}
+void MCAsmStreamer::EmitCFIUndefined(int64_t Register) {
+ MCStreamer::EmitCFIUndefined(Register);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_undefined " << Register;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIRegister(int64_t Register1, int64_t Register2) {
+ MCStreamer::EmitCFIRegister(Register1, Register2);
+
+ if (!UseCFI)
+ return;
+
+ OS << "\t.cfi_register " << Register1 << ", " << Register2;
+ EmitEOL();
+}
+
void MCAsmStreamer::EmitWin64EHStartProc(const MCSymbol *Symbol) {
MCStreamer::EmitWin64EHStartProc(Symbol);
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index 5189c9daee..490ca75bc8 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -20,7 +20,6 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ErrorHandling.h"
namespace llvm {
@@ -38,18 +37,6 @@ using namespace llvm;
LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
int TagType, LLVMOpInfoCallback GetOpInfo,
LLVMSymbolLookupCallback SymbolLookUp) {
- // Initialize targets and assembly printers/parsers.
- // FIXME: Clients are responsible for initializing the targets. And this
- // would be done by calling routines in "llvm-c/Target.h" which are static
- // line functions. But the current use of LLVMCreateDisasm() is to dynamically
- // load libLTO with dlopen() and then lookup the symbols using dlsym().
- // And since these initialize routines are static that does not work which
- // is why the call to them in this 'C' library API was added back.
- llvm::InitializeAllTargetInfos();
- llvm::InitializeAllTargetMCs();
- llvm::InitializeAllAsmParsers();
- llvm::InitializeAllDisassemblers();
-
// Get the target.
std::string Error;
const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index a1643b2da5..084fadec72 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -484,7 +484,8 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
// .debug_aranges section. Which contains a header and a table of pairs of
// PointerSize'ed values for the address and size of section(s) with line table
// entries (just the default .text in our case) and a terminating pair of zeros.
-static void EmitGenDwarfAranges(MCStreamer *MCOS) {
+static void EmitGenDwarfAranges(MCStreamer *MCOS,
+ const MCSymbol *InfoSectionSymbol) {
MCContext &context = MCOS->getContext();
// Create a symbol at the end of the section that we are creating the dwarf
@@ -523,8 +524,11 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS) {
// The 2 byte version, which is 2.
MCOS->EmitIntValue(2, 2);
// The 4 byte offset to the compile unit in the .debug_info from the start
- // of the .debug_info, it is at the start of that section so this is zero.
- MCOS->EmitIntValue(0, 4);
+ // of the .debug_info.
+ if (InfoSectionSymbol)
+ MCOS->EmitSymbolValue(InfoSectionSymbol, 4);
+ else
+ MCOS->EmitIntValue(0, 4);
// The 1 byte size of an address.
MCOS->EmitIntValue(AddrSize, 1);
// The 1 byte size of a segment descriptor, we use a value of zero.
@@ -705,15 +709,21 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS, const MCSymbol *LineSectionSymbol) {
// Create the dwarf sections in this order (.debug_line already created).
MCContext &context = MCOS->getContext();
const MCAsmInfo &AsmInfo = context.getAsmInfo();
+ bool CreateDwarfSectionSymbols =
+ AsmInfo.doesDwarfUseRelocationsAcrossSections();
+ if (!CreateDwarfSectionSymbols)
+ LineSectionSymbol = NULL;
+ MCSymbol *AbbrevSectionSymbol = NULL;
+ MCSymbol *InfoSectionSymbol = NULL;
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
+ if (CreateDwarfSectionSymbols) {
+ InfoSectionSymbol = context.CreateTempSymbol();
+ MCOS->EmitLabel(InfoSectionSymbol);
+ }
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection());
- MCSymbol *AbbrevSectionSymbol;
- if (AsmInfo.doesDwarfUseRelocationsAcrossSections()) {
+ if (CreateDwarfSectionSymbols) {
AbbrevSectionSymbol = context.CreateTempSymbol();
MCOS->EmitLabel(AbbrevSectionSymbol);
- } else {
- AbbrevSectionSymbol = NULL;
- LineSectionSymbol = NULL;
}
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
@@ -722,7 +732,7 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS, const MCSymbol *LineSectionSymbol) {
return;
// Output the data for .debug_aranges section.
- EmitGenDwarfAranges(MCOS);
+ EmitGenDwarfAranges(MCOS, InfoSectionSymbol);
// Output the data for .debug_abbrev section.
EmitGenDwarfAbbrev(MCOS);
@@ -928,46 +938,86 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
bool VerboseAsm = Streamer.isVerboseAsm();
switch (Instr.getOperation()) {
- case MCCFIInstruction::Move:
- case MCCFIInstruction::RelMove: {
- const MachineLocation &Dst = Instr.getDestination();
- const MachineLocation &Src = Instr.getSource();
- const bool IsRelative = Instr.getOperation() == MCCFIInstruction::RelMove;
-
- // If advancing cfa.
- if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
- if (Src.getReg() == MachineLocation::VirtualFP) {
- if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa_offset");
- Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_offset, 1);
- } else {
- if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa");
- Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa, 1);
- if (VerboseAsm) Streamer.AddComment(Twine("Reg ") +
- Twine(Src.getReg()));
- Streamer.EmitULEB128IntValue(Src.getReg());
- }
+ case MCCFIInstruction::OpRegister: {
+ unsigned Reg1 = Instr.getRegister();
+ unsigned Reg2 = Instr.getRegister2();
+ if (VerboseAsm) {
+ Streamer.AddComment("DW_CFA_register");
+ Streamer.AddComment(Twine("Reg1 ") + Twine(Reg1));
+ Streamer.AddComment(Twine("Reg2 ") + Twine(Reg2));
+ }
+ Streamer.EmitIntValue(dwarf::DW_CFA_register, 1);
+ Streamer.EmitULEB128IntValue(Reg1);
+ Streamer.EmitULEB128IntValue(Reg2);
+ return;
+ }
+ case MCCFIInstruction::OpUndefined: {
+ unsigned Reg = Instr.getRegister();
+ if (VerboseAsm) {
+ Streamer.AddComment("DW_CFA_undefined");
+ Streamer.AddComment(Twine("Reg ") + Twine(Reg));
+ }
+ Streamer.EmitIntValue(dwarf::DW_CFA_undefined, 1);
+ Streamer.EmitULEB128IntValue(Reg);
+ return;
+ }
+ case MCCFIInstruction::OpAdjustCfaOffset:
+ case MCCFIInstruction::OpDefCfaOffset: {
+ const bool IsRelative =
+ Instr.getOperation() == MCCFIInstruction::OpAdjustCfaOffset;
- if (IsRelative)
- CFAOffset += Src.getOffset();
- else
- CFAOffset = -Src.getOffset();
+ if (VerboseAsm)
+ Streamer.AddComment("DW_CFA_def_cfa_offset");
+ Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_offset, 1);
- if (VerboseAsm) Streamer.AddComment(Twine("Offset " + Twine(CFAOffset)));
- Streamer.EmitULEB128IntValue(CFAOffset);
- return;
- }
+ if (IsRelative)
+ CFAOffset += Instr.getOffset();
+ else
+ CFAOffset = -Instr.getOffset();
- if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
- assert(Dst.isReg() && "Machine move not supported yet.");
- if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa_register");
- Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_register, 1);
- if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Dst.getReg()));
- Streamer.EmitULEB128IntValue(Dst.getReg());
- return;
- }
+ if (VerboseAsm)
+ Streamer.AddComment(Twine("Offset " + Twine(CFAOffset)));
+ Streamer.EmitULEB128IntValue(CFAOffset);
+
+ return;
+ }
+ case MCCFIInstruction::OpDefCfa: {
+ if (VerboseAsm)
+ Streamer.AddComment("DW_CFA_def_cfa");
+ Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa, 1);
+
+ if (VerboseAsm)
+ Streamer.AddComment(Twine("Reg ") + Twine(Instr.getRegister()));
+ Streamer.EmitULEB128IntValue(Instr.getRegister());
+
+ CFAOffset = -Instr.getOffset();
+
+ if (VerboseAsm)
+ Streamer.AddComment(Twine("Offset " + Twine(CFAOffset)));
+ Streamer.EmitULEB128IntValue(CFAOffset);
+
+ return;
+ }
+
+ case MCCFIInstruction::OpDefCfaRegister: {
+ if (VerboseAsm)
+ Streamer.AddComment("DW_CFA_def_cfa_register");
+ Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_register, 1);
+
+ if (VerboseAsm)
+ Streamer.AddComment(Twine("Reg ") + Twine(Instr.getRegister()));
+ Streamer.EmitULEB128IntValue(Instr.getRegister());
+
+ return;
+ }
+
+ case MCCFIInstruction::OpOffset:
+ case MCCFIInstruction::OpRelOffset: {
+ const bool IsRelative =
+ Instr.getOperation() == MCCFIInstruction::OpRelOffset;
- unsigned Reg = Src.getReg();
- int Offset = Dst.getOffset();
+ unsigned Reg = Instr.getRegister();
+ int Offset = Instr.getOffset();
if (IsRelative)
Offset -= CFAOffset;
Offset = Offset / dataAlignmentFactor;
@@ -995,24 +1045,24 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
}
return;
}
- case MCCFIInstruction::RememberState:
+ case MCCFIInstruction::OpRememberState:
if (VerboseAsm) Streamer.AddComment("DW_CFA_remember_state");
Streamer.EmitIntValue(dwarf::DW_CFA_remember_state, 1);
return;
- case MCCFIInstruction::RestoreState:
+ case MCCFIInstruction::OpRestoreState:
if (VerboseAsm) Streamer.AddComment("DW_CFA_restore_state");
Streamer.EmitIntValue(dwarf::DW_CFA_restore_state, 1);
return;
- case MCCFIInstruction::SameValue: {
- unsigned Reg = Instr.getDestination().getReg();
+ case MCCFIInstruction::OpSameValue: {
+ unsigned Reg = Instr.getRegister();
if (VerboseAsm) Streamer.AddComment("DW_CFA_same_value");
Streamer.EmitIntValue(dwarf::DW_CFA_same_value, 1);
if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
Streamer.EmitULEB128IntValue(Reg);
return;
}
- case MCCFIInstruction::Restore: {
- unsigned Reg = Instr.getDestination().getReg();
+ case MCCFIInstruction::OpRestore: {
+ unsigned Reg = Instr.getRegister();
if (VerboseAsm) {
Streamer.AddComment("DW_CFA_restore");
Streamer.AddComment(Twine("Reg ") + Twine(Reg));
@@ -1020,7 +1070,7 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
Streamer.EmitIntValue(dwarf::DW_CFA_restore | Reg, 1);
return;
}
- case MCCFIInstruction::Escape:
+ case MCCFIInstruction::OpEscape:
if (VerboseAsm) Streamer.AddComment("Escape bytes");
Streamer.EmitBytes(Instr.getValues(), 0);
return;
@@ -1244,8 +1294,21 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
TranslateMachineLocation(MRI, Moves[i].getDestination());
const MachineLocation &Src =
TranslateMachineLocation(MRI, Moves[i].getSource());
- MCCFIInstruction Inst(Label, Dst, Src);
- Instructions.push_back(Inst);
+
+ if (Dst.isReg()) {
+ assert(Dst.getReg() == MachineLocation::VirtualFP);
+ assert(!Src.isReg());
+ MCCFIInstruction Inst =
+ MCCFIInstruction::createDefCfa(Label, Src.getReg(), -Src.getOffset());
+ Instructions.push_back(Inst);
+ } else {
+ assert(Src.isReg());
+ unsigned Reg = Src.getReg();
+ int Offset = Dst.getOffset();
+ MCCFIInstruction Inst =
+ MCCFIInstruction::createOffset(Label, Reg, Offset);
+ Instructions.push_back(Inst);
+ }
}
EmitCFIInstructions(streamer, Instructions, NULL);
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index e0336342d6..de2f375aab 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -229,6 +229,10 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_Mips_GOT_OFST: return "GOT_OFST";
case VK_Mips_HIGHER: return "HIGHER";
case VK_Mips_HIGHEST: return "HIGHEST";
+ case VK_Mips_GOT_HI16: return "GOT_HI16";
+ case VK_Mips_GOT_LO16: return "GOT_LO16";
+ case VK_Mips_CALL_HI16: return "CALL_HI16";
+ case VK_Mips_CALL_LO16: return "CALL_LO16";
}
llvm_unreachable("Invalid variant kind");
}
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index cf86a44d1b..ab554189ee 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -403,6 +403,10 @@ public:
&GenericAsmParser::ParseDirectiveCFIEscape>(".cfi_escape");
AddDirectiveHandler<
&GenericAsmParser::ParseDirectiveCFISignalFrame>(".cfi_signal_frame");
+ AddDirectiveHandler<
+ &GenericAsmParser::ParseDirectiveCFIUndefined>(".cfi_undefined");
+ AddDirectiveHandler<
+ &GenericAsmParser::ParseDirectiveCFIRegister>(".cfi_register");
// Macro directives.
AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>(
@@ -440,6 +444,8 @@ public:
bool ParseDirectiveCFIRestore(StringRef, SMLoc DirectiveLoc);
bool ParseDirectiveCFIEscape(StringRef, SMLoc DirectiveLoc);
bool ParseDirectiveCFISignalFrame(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIUndefined(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIRegister(StringRef, SMLoc DirectiveLoc);
bool ParseDirectiveMacrosOnOff(StringRef, SMLoc DirectiveLoc);
bool ParseDirectiveMacro(StringRef, SMLoc DirectiveLoc);
@@ -3308,6 +3314,43 @@ bool GenericAsmParser::ParseDirectiveCFISignalFrame(StringRef Directive,
return false;
}
+/// ParseDirectiveCFIUndefined
+/// ::= .cfi_undefined register
+bool GenericAsmParser::ParseDirectiveCFIUndefined(StringRef Directive,
+ SMLoc DirectiveLoc) {
+ int64_t Register = 0;
+
+ if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ return true;
+
+ getStreamer().EmitCFIUndefined(Register);
+
+ return false;
+}
+
+/// ParseDirectiveCFIRegister
+/// ::= .cfi_register register, register
+bool GenericAsmParser::ParseDirectiveCFIRegister(StringRef Directive,
+ SMLoc DirectiveLoc) {
+ int64_t Register1 = 0;
+
+ if (ParseRegisterOrRegisterNumber(Register1, DirectiveLoc))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ int64_t Register2 = 0;
+
+ if (ParseRegisterOrRegisterNumber(Register2, DirectiveLoc))
+ return true;
+
+ getStreamer().EmitCFIRegister(Register1, Register2);
+
+ return false;
+}
+
/// ParseDirectiveMacrosOnOff
/// ::= .macros_on
/// ::= .macros_off
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index afece0ba55..cdfd5246f3 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -234,69 +234,58 @@ void MCStreamer::RecordProcEnd(MCDwarfFrameInfo &Frame) {
EmitLabel(Frame.End);
}
-void MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
+MCSymbol *MCStreamer::EmitCFICommon() {
EnsureValidFrame();
- MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
MCSymbol *Label = getContext().CreateTempSymbol();
EmitLabel(Label);
- MachineLocation Dest(MachineLocation::VirtualFP);
- MachineLocation Source(Register, -Offset);
- MCCFIInstruction Instruction(Label, Dest, Source);
+ return Label;
+}
+
+void MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createDefCfa(Label, Register, Offset);
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
CurFrame->Instructions.push_back(Instruction);
}
void MCStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
- EnsureValidFrame();
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createDefCfaOffset(Label, Offset);
MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
- MCSymbol *Label = getContext().CreateTempSymbol();
- EmitLabel(Label);
- MachineLocation Dest(MachineLocation::VirtualFP);
- MachineLocation Source(MachineLocation::VirtualFP, -Offset);
- MCCFIInstruction Instruction(Label, Dest, Source);
CurFrame->Instructions.push_back(Instruction);
}
void MCStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) {
- EnsureValidFrame();
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createAdjustCfaOffset(Label, Adjustment);
MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
- MCSymbol *Label = getContext().CreateTempSymbol();
- EmitLabel(Label);
- MachineLocation Dest(MachineLocation::VirtualFP);
- MachineLocation Source(MachineLocation::VirtualFP, Adjustment);
- MCCFIInstruction Instruction(MCCFIInstruction::RelMove, Label, Dest, Source);
CurFrame->Instructions.push_back(Instruction);
}
void MCStreamer::EmitCFIDefCfaRegister(int64_t Register) {
- EnsureValidFrame();
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createDefCfaRegister(Label, Register);
MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
- MCSymbol *Label = getContext().CreateTempSymbol();
- EmitLabel(Label);
- MachineLocation Dest(Register);
- MachineLocation Source(MachineLocation::VirtualFP);
- MCCFIInstruction Instruction(Label, Dest, Source);
CurFrame->Instructions.push_back(Instruction);
}
void MCStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
- EnsureValidFrame();
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createOffset(Label, Register, Offset);
MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
- MCSymbol *Label = getContext().CreateTempSymbol();
- EmitLabel(Label);
- MachineLocation Dest(Register, Offset);
- MachineLocation Source(Register, Offset);
- MCCFIInstruction Instruction(Label, Dest, Source);
CurFrame->Instructions.push_back(Instruction);
}
void MCStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) {
- EnsureValidFrame();
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createRelOffset(Label, Register, Offset);
MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
- MCSymbol *Label = getContext().CreateTempSymbol();
- EmitLabel(Label);
- MachineLocation Dest(Register, Offset);
- MachineLocation Source(Register, Offset);
- MCCFIInstruction Instruction(MCCFIInstruction::RelMove, Label, Dest, Source);
CurFrame->Instructions.push_back(Instruction);
}
@@ -316,48 +305,40 @@ void MCStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
}
void MCStreamer::EmitCFIRememberState() {
- EnsureValidFrame();
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction = MCCFIInstruction::createRememberState(Label);
MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
- MCSymbol *Label = getContext().CreateTempSymbol();
- EmitLabel(Label);
- MCCFIInstruction Instruction(MCCFIInstruction::RememberState, Label);
CurFrame->Instructions.push_back(Instruction);
}
void MCStreamer::EmitCFIRestoreState() {
// FIXME: Error if there is no matching cfi_remember_state.
- EnsureValidFrame();
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction = MCCFIInstruction::createRestoreState(Label);
MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
- MCSymbol *Label = getContext().CreateTempSymbol();
- EmitLabel(Label);
- MCCFIInstruction Instruction(MCCFIInstruction::RestoreState, Label);
CurFrame->Instructions.push_back(Instruction);
}
void MCStreamer::EmitCFISameValue(int64_t Register) {
- EnsureValidFrame();
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createSameValue(Label, Register);
MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
- MCSymbol *Label = getContext().CreateTempSymbol();
- EmitLabel(Label);
- MCCFIInstruction Instruction(MCCFIInstruction::SameValue, Label, Register);
CurFrame->Instructions.push_back(Instruction);
}
void MCStreamer::EmitCFIRestore(int64_t Register) {
- EnsureValidFrame();
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createRestore(Label, Register);
MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
- MCSymbol *Label = getContext().CreateTempSymbol();
- EmitLabel(Label);
- MCCFIInstruction Instruction(MCCFIInstruction::Restore, Label, Register);
CurFrame->Instructions.push_back(Instruction);
}
void MCStreamer::EmitCFIEscape(StringRef Values) {
- EnsureValidFrame();
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction = MCCFIInstruction::createEscape(Label, Values);
MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
- MCSymbol *Label = getContext().CreateTempSymbol();
- EmitLabel(Label);
- MCCFIInstruction Instruction(MCCFIInstruction::Escape, Label, Values);
CurFrame->Instructions.push_back(Instruction);
}
@@ -367,6 +348,22 @@ void MCStreamer::EmitCFISignalFrame() {
CurFrame->IsSignalFrame = true;
}
+void MCStreamer::EmitCFIUndefined(int64_t Register) {
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createUndefined(Label, Register);
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIRegister(int64_t Register1, int64_t Register2) {
+ MCSymbol *Label = EmitCFICommon();
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createRegister(Label, Register1, Register2);
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Instructions.push_back(Instruction);
+}
+
void MCStreamer::setCurrentW64UnwindInfo(MCWin64EHUnwindInfo *Frame) {
W64UnwindInfos.push_back(Frame);
CurrentW64UnwindInfo = W64UnwindInfos.back();
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index 2a5951ada5..5b1e007e85 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -13,6 +13,7 @@
#include "llvm/Object/Archive.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -122,7 +123,14 @@ error_code Archive::Child::getName(StringRef &Result) const {
+ sizeof(ArchiveMemberHeader)
+ Parent->StringTable->getSize()))
return object_error::parse_failed;
- Result = addr;
+
+ // GNU long file names end with a /.
+ if (Parent->kind() == K_GNU) {
+ StringRef::size_type End = StringRef(addr).find('/');
+ Result = StringRef(addr, End);
+ } else {
+ Result = addr;
+ }
return object_error::success;
} else if (name.startswith("#1/")) {
APInt name_size;
@@ -187,15 +195,52 @@ Archive::Archive(MemoryBuffer *source, error_code &ec)
child_iterator i = begin_children(false);
child_iterator e = end_children();
- if (i != e) ++i; // Nobody cares about the first member.
- if (i != e) {
- SymbolTable = i;
- ++i;
- }
- if (i != e) {
- StringTable = i;
- }
+ StringRef name;
+ if ((ec = i->getName(name)))
+ return;
+ // Below is the pattern that is used to figure out the archive format
+ // GNU archive format
+ // First member : / (points to the symbol table )
+ // Second member : // (may exist, if it exists, points to the string table)
+ // Note : The string table is used if the filename exceeds 15 characters
+ // BSD archive format
+ // First member : __.SYMDEF (points to the symbol table)
+ // There is no string table, if the filename exceeds 15 characters or has a
+ // embedded space, the filename has #1/<size>, The size represents the size
+ // of the filename that needs to be read after the archive header
+ // COFF archive format
+ // First member : /
+ // Second member : / (provides a directory of symbols)
+ // Third member : // contains the string table, this is present even if the
+ // string table is empty
+ if (name == "/") {
+ SymbolTable = i;
+ StringTable = e;
+ if (i != e) ++i;
+ if ((ec = i->getName(name)))
+ return;
+ if (name[0] != '/') {
+ Format = K_GNU;
+ } else if ((name.size() > 1) && (name == "//")) {
+ Format = K_GNU;
+ StringTable = i;
+ ++i;
+ } else {
+ Format = K_COFF;
+ if (i != e) {
+ SymbolTable = i;
+ ++i;
+ }
+ if (i != e) {
+ StringTable = i;
+ }
+ }
+ } else if (name == "__.SYMDEF") {
+ Format = K_BSD;
+ SymbolTable = i;
+ StringTable = e;
+ }
ec = object_error::success;
}
@@ -221,20 +266,45 @@ error_code Archive::Symbol::getName(StringRef &Result) const {
}
error_code Archive::Symbol::getMember(child_iterator &Result) const {
- const char *buf = Parent->SymbolTable->getBuffer()->getBufferStart();
- uint32_t member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
- const char *offsets = buf + 4;
- buf += 4 + (member_count * 4); // Skip offsets.
- const char *indicies = buf + 4;
+ const char *Buf = Parent->SymbolTable->getBuffer()->getBufferStart();
+ const char *Offsets = Buf + 4;
+ uint32_t Offset = 0;
+ if (Parent->kind() == K_GNU) {
+ Offset = *(reinterpret_cast<const support::ubig32_t*>(Offsets)
+ + SymbolIndex);
+ } else if (Parent->kind() == K_BSD) {
+ llvm_unreachable("BSD format is not supported");
+ } else {
+ uint32_t MemberCount = *reinterpret_cast<const support::ulittle32_t*>(Buf);
+
+ // Skip offsets.
+ Buf += sizeof(support::ulittle32_t)
+ + (MemberCount * sizeof(support::ulittle32_t));
+
+ uint32_t SymbolCount = *reinterpret_cast<const support::ulittle32_t*>(Buf);
+
+ if (SymbolIndex >= SymbolCount)
+ return object_error::parse_failed;
- uint16_t offsetindex =
- *(reinterpret_cast<const support::ulittle16_t*>(indicies)
- + SymbolIndex);
+ // Skip SymbolCount to get to the indices table.
+ const char *Indices = Buf + sizeof(support::ulittle32_t);
- uint32_t offset = *(reinterpret_cast<const support::ulittle32_t*>(offsets)
- + (offsetindex - 1));
+ // Get the index of the offset in the file member offset table for this
+ // symbol.
+ uint16_t OffsetIndex =
+ *(reinterpret_cast<const support::ulittle16_t*>(Indices)
+ + SymbolIndex);
+ // Subtract 1 since OffsetIndex is 1 based.
+ --OffsetIndex;
- const char *Loc = Parent->getData().begin() + offset;
+ if (OffsetIndex >= MemberCount)
+ return object_error::parse_failed;
+
+ Offset = *(reinterpret_cast<const support::ulittle32_t*>(Offsets)
+ + OffsetIndex);
+ }
+
+ const char *Loc = Parent->getData().begin() + Offset;
size_t Size = sizeof(ArchiveMemberHeader) +
ToHeader(Loc)->getSize();
Result = Child(Parent, StringRef(Loc, Size));
@@ -253,10 +323,20 @@ Archive::Symbol Archive::Symbol::getNext() const {
Archive::symbol_iterator Archive::begin_symbols() const {
const char *buf = SymbolTable->getBuffer()->getBufferStart();
- uint32_t member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
- buf += 4 + (member_count * 4); // Skip offsets.
- uint32_t symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
- buf += 4 + (symbol_count * 2); // Skip indices.
+ if (kind() == K_GNU) {
+ uint32_t symbol_count = 0;
+ symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf);
+ buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
+ } else if (kind() == K_BSD) {
+ llvm_unreachable("BSD archive format is not supported");
+ } else {
+ uint32_t member_count = 0;
+ uint32_t symbol_count = 0;
+ member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+ buf += 4 + (member_count * 4); // Skip offsets.
+ symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+ buf += 4 + (symbol_count * 2); // Skip indices.
+ }
uint32_t string_start_offset =
buf - SymbolTable->getBuffer()->getBufferStart();
return symbol_iterator(Symbol(this, 0, string_start_offset));
@@ -264,9 +344,36 @@ Archive::symbol_iterator Archive::begin_symbols() const {
Archive::symbol_iterator Archive::end_symbols() const {
const char *buf = SymbolTable->getBuffer()->getBufferStart();
- uint32_t member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
- buf += 4 + (member_count * 4); // Skip offsets.
- uint32_t symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+ uint32_t symbol_count = 0;
+ if (kind() == K_GNU) {
+ symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf);
+ buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
+ } else if (kind() == K_BSD) {
+ llvm_unreachable("BSD archive format is not supported");
+ } else {
+ uint32_t member_count = 0;
+ member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+ buf += 4 + (member_count * 4); // Skip offsets.
+ symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+ }
return symbol_iterator(
Symbol(this, symbol_count, 0));
}
+
+Archive::child_iterator Archive::findSym(StringRef name) const {
+ Archive::symbol_iterator bs = begin_symbols();
+ Archive::symbol_iterator es = end_symbols();
+ Archive::child_iterator result;
+
+ StringRef symname;
+ for (; bs != es; ++bs) {
+ if (bs->getName(symname))
+ return end_children();
+ if (symname == name) {
+ if (bs->getMember(result))
+ return end_children();
+ return result;
+ }
+ }
+ return end_children();
+}
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index 4d489a88e5..4672554022 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -8,9 +8,7 @@
//===----------------------------------------------------------------------===//
//
// This file implements a hash set that can be used to remove duplication of
-// nodes in a graph. This code was originally created by Chris Lattner for use
-// with SelectionDAGCSEMap, but was isolated to provide use across the llvm code
-// set.
+// nodes in a graph.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index c59ec19ecb..52bf722d53 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -20,7 +20,6 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case UnknownArch: return "unknown";
case arm: return "arm";
- case cellspu: return "cellspu";
case hexagon: return "hexagon";
case mips: return "mips";
case mipsel: return "mipsel";
@@ -43,6 +42,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case le32: return "le32";
case amdil: return "amdil";
case spir: return "spir";
+ case spir64: return "spir64";
}
llvm_unreachable("Invalid ArchType!");
@@ -56,8 +56,6 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case arm:
case thumb: return "arm";
- case cellspu: return "spu";
-
case ppc64:
case ppc: return "ppc";
@@ -85,6 +83,7 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case le32: return "le32";
case amdil: return "amdil";
case spir: return "spir";
+ case spir64: return "spir";
}
}
@@ -153,7 +152,6 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
return StringSwitch<Triple::ArchType>(Name)
.Case("arm", arm)
- .Case("cellspu", cellspu)
.Case("mips", mips)
.Case("mipsel", mipsel)
.Case("mips64", mips64)
@@ -177,6 +175,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("le32", le32)
.Case("amdil", amdil)
.Case("spir", spir)
+ .Case("spir64", spir64)
.Default(UnknownArch);
}
@@ -202,6 +201,7 @@ const char *Triple::getArchNameForAssembler() {
.Case("le32", "le32")
.Case("amdil", "amdil")
.Case("spir", "spir")
+ .Case("spir64", "spir64")
.Default(NULL);
}
@@ -220,7 +220,6 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.StartsWith("armv", Triple::arm)
.Case("thumb", Triple::thumb)
.StartsWith("thumbv", Triple::thumb)
- .Cases("spu", "cellspu", Triple::cellspu)
.Case("msp430", Triple::msp430)
.Cases("mips", "mipseb", "mipsallegrex", Triple::mips)
.Cases("mipsel", "mipsallegrexel", Triple::mipsel)
@@ -237,6 +236,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("le32", Triple::le32)
.Case("amdil", Triple::amdil)
.Case("spir", Triple::spir)
+ .Case("spir64", Triple::spir64)
.Default(Triple::UnknownArch);
}
@@ -650,7 +650,6 @@ void Triple::setOSAndEnvironmentName(StringRef Str) {
static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
switch (Arch) {
- case llvm::Triple::spir:
case llvm::Triple::UnknownArch:
return 0;
@@ -659,7 +658,6 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::amdil:
case llvm::Triple::arm:
- case llvm::Triple::cellspu:
case llvm::Triple::hexagon:
case llvm::Triple::le32:
case llvm::Triple::mblaze:
@@ -673,6 +671,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::thumb:
case llvm::Triple::x86:
case llvm::Triple::xcore:
+ case llvm::Triple::spir:
return 32;
case llvm::Triple::mips64:
@@ -681,6 +680,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::ppc64:
case llvm::Triple::sparcv9:
case llvm::Triple::x86_64:
+ case llvm::Triple::spir64:
return 64;
}
llvm_unreachable("Invalid architecture value");
@@ -709,7 +709,6 @@ Triple Triple::get32BitArchVariant() const {
case Triple::amdil:
case Triple::spir:
case Triple::arm:
- case Triple::cellspu:
case Triple::hexagon:
case Triple::le32:
case Triple::mblaze:
@@ -732,6 +731,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::ppc64: T.setArch(Triple::ppc); break;
case Triple::sparcv9: T.setArch(Triple::sparc); break;
case Triple::x86_64: T.setArch(Triple::x86); break;
+ case Triple::spir64: T.setArch(Triple::spir); break;
}
return T;
}
@@ -742,7 +742,6 @@ Triple Triple::get64BitArchVariant() const {
case Triple::UnknownArch:
case Triple::amdil:
case Triple::arm:
- case Triple::cellspu:
case Triple::hexagon:
case Triple::le32:
case Triple::mblaze:
@@ -754,7 +753,7 @@ Triple Triple::get64BitArchVariant() const {
T.setArch(UnknownArch);
break;
- case Triple::spir:
+ case Triple::spir64:
case Triple::mips64:
case Triple::mips64el:
case Triple::nvptx64:
@@ -770,6 +769,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::ppc: T.setArch(Triple::ppc64); break;
case Triple::sparc: T.setArch(Triple::sparcv9); break;
case Triple::x86: T.setArch(Triple::x86_64); break;
+ case Triple::spir: T.setArch(Triple::spir64); break;
}
return T;
}
diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc
index 59c5ae5808..35e5120369 100644
--- a/lib/Support/Unix/PathV2.inc
+++ b/lib/Support/Unix/PathV2.inc
@@ -453,9 +453,10 @@ rety_open_create:
// If the file existed, try again, otherwise, error.
if (errno == errc::file_exists)
goto retry_random_path;
- // The path prefix doesn't exist.
- if (errno == errc::no_such_file_or_directory) {
- StringRef p(RandomPath.begin(), RandomPath.size());
+ // If path prefix doesn't exist, try to create it.
+ if (errno == errc::no_such_file_or_directory &&
+ !exists(path::parent_path(RandomPath))) {
+ StringRef p(RandomPath);
SmallString<64> dir_to_create;
for (path::const_iterator i = path::begin(p),
e = --path::end(p); i != e; ++i) {
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
index 34df636a72..2ae33f5b99 100644
--- a/lib/Support/YAMLParser.cpp
+++ b/lib/Support/YAMLParser.cpp
@@ -252,6 +252,7 @@ namespace yaml {
class Scanner {
public:
Scanner(const StringRef Input, SourceMgr &SM);
+ Scanner(MemoryBuffer *Buffer, SourceMgr &SM_);
/// @brief Parse the next token and return it without popping it.
Token &peekNext();
@@ -708,6 +709,21 @@ Scanner::Scanner(StringRef Input, SourceMgr &sm)
End = InputBuffer->getBufferEnd();
}
+Scanner::Scanner(MemoryBuffer *Buffer, SourceMgr &SM_)
+ : SM(SM_)
+ , InputBuffer(Buffer)
+ , Current(InputBuffer->getBufferStart())
+ , End(InputBuffer->getBufferEnd())
+ , Indent(-1)
+ , Column(0)
+ , Line(0)
+ , FlowLevel(0)
+ , IsStartOfStream(true)
+ , IsSimpleKeyAllowed(true)
+ , Failed(false) {
+ SM.AddNewSourceBuffer(InputBuffer, SMLoc());
+}
+
Token &Scanner::peekNext() {
// If the current token is a possible simple key, keep parsing until we
// can confirm.
@@ -1532,6 +1548,10 @@ Stream::Stream(StringRef Input, SourceMgr &SM)
: scanner(new Scanner(Input, SM))
, CurrentDoc(0) {}
+Stream::Stream(MemoryBuffer *InputBuffer, SourceMgr &SM)
+ : scanner(new Scanner(InputBuffer, SM))
+ , CurrentDoc(0) {}
+
Stream::~Stream() {}
bool Stream::failed() { return scanner->failed(); }
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 36819a0d78..2b2662e0a6 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -37,6 +37,7 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCStreamer.h"
@@ -489,7 +490,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
}
-MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel(void) const {
+MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const {
SmallString<60> Name;
raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "SJLJEH"
<< getFunctionNumber();
@@ -1166,12 +1167,10 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
OutContext);
// If this isn't a TBB or TBH, the entries are direct branch instructions.
if (OffsetWidth == 4) {
- MCInst BrInst;
- BrInst.setOpcode(ARM::t2B);
- BrInst.addOperand(MCOperand::CreateExpr(MBBSymbolExpr));
- BrInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- BrInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(BrInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2B)
+ .addExpr(MBBSymbolExpr)
+ .addImm(ARMCC::AL)
+ .addReg(0));
continue;
}
// Otherwise it's an offset from the dispatch instruction. Construct an
@@ -1215,18 +1214,6 @@ void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
printOperand(MI, NOps-2, OS);
}
-static void populateADROperands(MCInst &Inst, unsigned Dest,
- const MCSymbol *Label,
- unsigned pred, unsigned ccreg,
- MCContext &Ctx) {
- const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, Ctx);
- Inst.addOperand(MCOperand::CreateReg(Dest));
- Inst.addOperand(MCOperand::CreateExpr(SymbolExpr));
- // Add predicate operands.
- Inst.addOperand(MCOperand::CreateImm(pred));
- Inst.addOperand(MCOperand::CreateReg(ccreg));
-}
-
void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
assert(MI->getFlag(MachineInstr::FrameSetup) &&
"Only instruction which are involved into frame setup code are allowed");
@@ -1403,129 +1390,104 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case ARM::tLEApcrel:
case ARM::t2LEApcrel: {
// FIXME: Need to also handle globals and externals
- MCInst TmpInst;
- TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrel ? ARM::t2ADR
- : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
- : ARM::ADR));
- populateADROperands(TmpInst, MI->getOperand(0).getReg(),
- GetCPISymbol(MI->getOperand(1).getIndex()),
- MI->getOperand(2).getImm(), MI->getOperand(3).getReg(),
- OutContext);
- OutStreamer.EmitInstruction(TmpInst);
+ MCSymbol *CPISymbol = GetCPISymbol(MI->getOperand(1).getIndex());
+ OutStreamer.EmitInstruction(MCInstBuilder(MI->getOpcode() ==
+ ARM::t2LEApcrel ? ARM::t2ADR
+ : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
+ : ARM::ADR))
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(MCSymbolRefExpr::Create(CPISymbol, OutContext))
+ // Add predicate operands.
+ .addImm(MI->getOperand(2).getImm())
+ .addReg(MI->getOperand(3).getReg()));
return;
}
case ARM::LEApcrelJT:
case ARM::tLEApcrelJT:
case ARM::t2LEApcrelJT: {
- MCInst TmpInst;
- TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrelJT ? ARM::t2ADR
- : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
- : ARM::ADR));
- populateADROperands(TmpInst, MI->getOperand(0).getReg(),
- GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(),
- MI->getOperand(2).getImm()),
- MI->getOperand(3).getImm(), MI->getOperand(4).getReg(),
- OutContext);
- OutStreamer.EmitInstruction(TmpInst);
+ MCSymbol *JTIPICSymbol =
+ GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(),
+ MI->getOperand(2).getImm());
+ OutStreamer.EmitInstruction(MCInstBuilder(MI->getOpcode() ==
+ ARM::t2LEApcrelJT ? ARM::t2ADR
+ : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
+ : ARM::ADR))
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(MCSymbolRefExpr::Create(JTIPICSymbol, OutContext))
+ // Add predicate operands.
+ .addImm(MI->getOperand(3).getImm())
+ .addReg(MI->getOperand(4).getReg()));
return;
}
// Darwin call instructions are just normal call instructions with different
// clobber semantics (they clobber R9).
case ARM::BX_CALL: {
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::LR)
+ .addReg(ARM::PC)
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// Add 's' bit operand (always reg0 for this)
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::BX);
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::BX)
+ .addReg(MI->getOperand(0).getReg()));
return;
}
case ARM::tBX_CALL: {
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+ .addReg(ARM::LR)
+ .addReg(ARM::PC)
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tBX);
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tBX)
+ .addReg(MI->getOperand(0).getReg())
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addImm(ARMCC::AL)
+ .addReg(0));
return;
}
case ARM::BMOVPCRX_CALL: {
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::LR)
+ .addReg(ARM::PC)
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// Add 's' bit operand (always reg0 for this)
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::PC)
+ .addImm(MI->getOperand(0).getReg())
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// Add 's' bit operand (always reg0 for this)
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addReg(0));
return;
}
case ARM::BMOVPCB_CALL: {
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
+ .addReg(ARM::LR)
+ .addReg(ARM::PC)
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// Add 's' bit operand (always reg0 for this)
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::Bcc);
- const GlobalValue *GV = MI->getOperand(0).getGlobal();
- MCSymbol *GVSym = Mang->getSymbol(GV);
- const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
- TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr));
+ .addReg(0));
+
+ const GlobalValue *GV = MI->getOperand(0).getGlobal();
+ MCSymbol *GVSym = Mang->getSymbol(GV);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::Bcc)
+ .addExpr(GVSymExpr)
// Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addImm(ARMCC::AL)
+ .addReg(0));
return;
}
case ARM::MOVi16_ga_pcrel:
@@ -1613,15 +1575,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutContext));
// Form and emit the add.
- MCInst AddInst;
- AddInst.setOpcode(ARM::tADDhirr);
- AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
- // Add predicate operands.
- AddInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- AddInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(AddInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tADDhirr)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(ARM::PC)
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
return;
}
case ARM::PICADD: {
@@ -1636,17 +1596,15 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutContext));
// Form and emit the add.
- MCInst AddInst;
- AddInst.setOpcode(ARM::ADDrr);
- AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
- AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
- // Add predicate operands.
- AddInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
- AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
- // Add 's' bit operand (always reg0 for this)
- AddInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(AddInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDrr)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(1).getReg())
+ // Add predicate operands.
+ .addImm(MI->getOperand(3).getImm())
+ .addReg(MI->getOperand(4).getReg())
+ // Add 's' bit operand (always reg0 for this)
+ .addReg(0));
return;
}
case ARM::PICSTR:
@@ -1682,16 +1640,14 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case ARM::PICLDRSB: Opcode = ARM::LDRSB; break;
case ARM::PICLDRSH: Opcode = ARM::LDRSH; break;
}
- MCInst LdStInst;
- LdStInst.setOpcode(Opcode);
- LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- LdStInst.addOperand(MCOperand::CreateReg(ARM::PC));
- LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
- LdStInst.addOperand(MCOperand::CreateImm(0));
- // Add predicate operands.
- LdStInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
- LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
- OutStreamer.EmitInstruction(LdStInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(Opcode)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(0)
+ // Add predicate operands.
+ .addImm(MI->getOperand(3).getImm())
+ .addReg(MI->getOperand(4).getReg()));
return;
}
@@ -1743,29 +1699,26 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case ARM::t2BR_JT: {
// Lower and emit the instruction itself, then the jump table following it.
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- // Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
// Output the data for the jump table itself
EmitJump2Table(MI);
return;
}
case ARM::t2TBB_JT: {
// Lower and emit the instruction itself, then the jump table following it.
- MCInst TmpInst;
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2TBB)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
- TmpInst.setOpcode(ARM::t2TBB);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- // Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
// Output the data for the jump table itself
EmitJump2Table(MI);
// Make sure the next instruction is 2-byte aligned.
@@ -1774,15 +1727,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case ARM::t2TBH_JT: {
// Lower and emit the instruction itself, then the jump table following it.
- MCInst TmpInst;
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2TBH)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0));
- TmpInst.setOpcode(ARM::t2TBH);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- // Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
// Output the data for the jump table itself
EmitJump2Table(MI);
return;
@@ -1842,17 +1793,15 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case ARM::BR_JTadd: {
// Lower and emit the instruction itself, then the jump table following it.
// add pc, target, idx
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::ADDrr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
- // Add predicate operands.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- // Add 's' bit operand (always reg0 for this)
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDrr)
+ .addReg(ARM::PC)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ // Add predicate operands.
+ .addImm(ARMCC::AL)
+ .addReg(0)
+ // Add 's' bit operand (always reg0 for this)
+ .addReg(0));
// Output the data for the jump table itself
EmitJumpTable(MI);
@@ -1898,75 +1847,57 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
unsigned SrcReg = MI->getOperand(0).getReg();
unsigned ValReg = MI->getOperand(1).getReg();
MCSymbol *Label = GetARMSJLJEHLabel();
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ValReg));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ OutStreamer.AddComment("eh_setjmp begin");
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+ .addReg(ValReg)
+ .addReg(ARM::PC)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.AddComment("eh_setjmp begin");
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tADDi3);
- TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tADDi3)
+ .addReg(ValReg)
// 's' bit operand
- TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
- TmpInst.addOperand(MCOperand::CreateReg(ValReg));
- TmpInst.addOperand(MCOperand::CreateImm(7));
+ .addReg(ARM::CPSR)
+ .addReg(ValReg)
+ .addImm(7)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tSTRi);
- TmpInst.addOperand(MCOperand::CreateReg(ValReg));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tSTRi)
+ .addReg(ValReg)
+ .addReg(SrcReg)
// The offset immediate is #4. The operand value is scaled by 4 for the
// tSTR instruction.
- TmpInst.addOperand(MCOperand::CreateImm(1));
+ .addImm(1)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVi8);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
- TmpInst.addOperand(MCOperand::CreateImm(0));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVi8)
+ .addReg(ARM::R0)
+ .addReg(ARM::CPSR)
+ .addImm(0)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext);
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tB);
- TmpInst.addOperand(MCOperand::CreateExpr(SymbolExpr));
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVi8);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
- TmpInst.addOperand(MCOperand::CreateImm(1));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tB)
+ .addExpr(SymbolExpr)
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.AddComment("eh_setjmp end");
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVi8)
+ .addReg(ARM::R0)
+ .addReg(ARM::CPSR)
+ .addImm(1)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.AddComment("eh_setjmp end");
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
OutStreamer.EmitLabel(Label);
return;
}
@@ -1982,69 +1913,53 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
unsigned SrcReg = MI->getOperand(0).getReg();
unsigned ValReg = MI->getOperand(1).getReg();
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::ADDri);
- TmpInst.addOperand(MCOperand::CreateReg(ValReg));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateImm(8));
+ OutStreamer.AddComment("eh_setjmp begin");
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDri)
+ .addReg(ValReg)
+ .addReg(ARM::PC)
+ .addImm(8)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// 's' bit operand (always reg0 for this).
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.AddComment("eh_setjmp begin");
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::STRi12);
- TmpInst.addOperand(MCOperand::CreateReg(ValReg));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateImm(4));
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::STRi12)
+ .addReg(ValReg)
+ .addReg(SrcReg)
+ .addImm(4)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVi);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
- TmpInst.addOperand(MCOperand::CreateImm(0));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVi)
+ .addReg(ARM::R0)
+ .addImm(0)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// 's' bit operand (always reg0 for this).
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::ADDri);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
- TmpInst.addOperand(MCOperand::CreateImm(0));
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDri)
+ .addReg(ARM::PC)
+ .addReg(ARM::PC)
+ .addImm(0)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// 's' bit operand (always reg0 for this).
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVi);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
- TmpInst.addOperand(MCOperand::CreateImm(1));
+ .addReg(0));
+
+ OutStreamer.AddComment("eh_setjmp end");
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVi)
+ .addReg(ARM::R0)
+ .addImm(1)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
+ .addImm(ARMCC::AL)
+ .addReg(0)
// 's' bit operand (always reg0 for this).
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.AddComment("eh_setjmp end");
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addReg(0));
return;
}
case ARM::Int_eh_sjlj_longjmp: {
@@ -2054,48 +1969,35 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// bx $scratch
unsigned SrcReg = MI->getOperand(0).getReg();
unsigned ScratchReg = MI->getOperand(1).getReg();
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::LDRi12);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateImm(8));
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12)
+ .addReg(ARM::SP)
+ .addReg(SrcReg)
+ .addImm(8)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::LDRi12);
- TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateImm(4));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12)
+ .addReg(ScratchReg)
+ .addReg(SrcReg)
+ .addImm(4)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::LDRi12);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateImm(0));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12)
+ .addReg(ARM::R7)
+ .addReg(SrcReg)
+ .addImm(0)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::BX);
- TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::BX)
+ .addReg(ScratchReg)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addImm(ARMCC::AL)
+ .addReg(0));
return;
}
case ARM::tInt_eh_sjlj_longjmp: {
@@ -2106,60 +2008,44 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// bx $scratch
unsigned SrcReg = MI->getOperand(0).getReg();
unsigned ScratchReg = MI->getOperand(1).getReg();
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tLDRi);
- TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi)
+ .addReg(ScratchReg)
+ .addReg(SrcReg)
// The offset immediate is #8. The operand value is scaled by 4 for the
// tLDR instruction.
- TmpInst.addOperand(MCOperand::CreateImm(2));
+ .addImm(2)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tMOVr);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
- TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr)
+ .addReg(ARM::SP)
+ .addReg(ScratchReg)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tLDRi);
- TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateImm(1));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi)
+ .addReg(ScratchReg)
+ .addReg(SrcReg)
+ .addImm(1)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tLDRi);
- TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
- TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
- TmpInst.addOperand(MCOperand::CreateImm(0));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi)
+ .addReg(ARM::R7)
+ .addReg(SrcReg)
+ .addImm(0)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
- {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM::tBX);
- TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ .addImm(ARMCC::AL)
+ .addReg(0));
+
+ OutStreamer.EmitInstruction(MCInstBuilder(ARM::tBX)
+ .addReg(ScratchReg)
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
- TmpInst.addOperand(MCOperand::CreateReg(0));
- OutStreamer.EmitInstruction(TmpInst);
- }
+ .addImm(ARMCC::AL)
+ .addReg(0));
return;
}
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index ee3604499f..b2d31244c5 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -139,7 +139,7 @@ private:
MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
- MCSymbol *GetARMSJLJEHLabel(void) const;
+ MCSymbol *GetARMSJLJEHLabel() const;
MCSymbol *GetARMGVSymbol(const GlobalValue *GV);
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 5280abb40c..323b022ad4 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1373,6 +1373,9 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
/// only return true if the base pointers are the same and the only differences
/// between the two addresses is the offset. It also returns the offsets by
/// reference.
+///
+/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
+/// is permanently disabled.
bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
int64_t &Offset1,
int64_t &Offset2) const {
@@ -1447,6 +1450,9 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
/// from the common base address. It returns true if it decides it's desirable
/// to schedule the two loads together. "NumLoads" is the number of loads that
/// have already been scheduled after Load1.
+///
+/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
+/// is permanently disabled.
bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const {
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 90ae94b3b2..8135d58aff 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -277,15 +277,16 @@ private:
char ConstraintCode,
std::vector<SDValue> &OutOps);
- // Form pairs of consecutive S, D, or Q registers.
- SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1);
- SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
- SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1);
+ // Form pairs of consecutive R, S, D, or Q registers.
+ SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
+ SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
+ SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
+ SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
// Form sequences of 4 consecutive S, D, or Q registers.
- SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
- SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
- SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+ SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
// Get the alignment operand for a NEON VLD or VST instruction.
SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector);
@@ -1543,9 +1544,19 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
return NULL;
}
-/// PairSRegs - Form a D register from a pair of S registers.
-///
-SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
+/// \brief Form a GPRPair pseudo register from a pair of GPR regs.
+SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue RegClass =
+ CurDAG->getTargetConstant(ARM::GPRPairRegClassID, MVT::i32);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
+ const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+}
+
+/// \brief Form a D register from a pair of S registers.
+SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue RegClass =
CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32);
@@ -1555,9 +1566,8 @@ SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
}
-/// PairDRegs - Form a quad register from a pair of D registers.
-///
-SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
+/// \brief Form a quad register from a pair of D registers.
+SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
@@ -1566,9 +1576,8 @@ SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
}
-/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
-///
-SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
+/// \brief Form 4 consecutive D registers from a pair of Q registers.
+SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
@@ -1577,9 +1586,8 @@ SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
}
-/// QuadSRegs - Form 4 consecutive S registers.
-///
-SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
+/// \brief Form 4 consecutive S registers.
+SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue RegClass =
@@ -1593,9 +1601,8 @@ SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
}
-/// QuadDRegs - Form 4 consecutive D registers.
-///
-SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
+/// \brief Form 4 consecutive D registers.
+SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
@@ -1608,9 +1615,8 @@ SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
}
-/// QuadQRegs - Form 4 consecutive Q registers.
-///
-SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
+/// \brief Form 4 consecutive Q registers.
+SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32);
@@ -1883,7 +1889,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue V0 = N->getOperand(Vec0Idx + 0);
SDValue V1 = N->getOperand(Vec0Idx + 1);
if (NumVecs == 2)
- SrcReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
else {
SDValue V2 = N->getOperand(Vec0Idx + 2);
// If it's a vst3, form a quad D-register and leave the last part as
@@ -1891,13 +1897,13 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
: N->getOperand(Vec0Idx + 3);
- SrcReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
}
} else {
// Form a QQ register.
SDValue Q0 = N->getOperand(Vec0Idx);
SDValue Q1 = N->getOperand(Vec0Idx + 1);
- SrcReg = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
+ SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
}
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
@@ -1939,7 +1945,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
: N->getOperand(Vec0Idx + 3);
- SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+ SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
// Store the even D registers. This is always an updating store, so that it
// provides the address to the second store for the odd subregs.
@@ -2049,18 +2055,18 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
SDValue V1 = N->getOperand(Vec0Idx + 1);
if (NumVecs == 2) {
if (is64BitVector)
- SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
else
- SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
+ SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
} else {
SDValue V2 = N->getOperand(Vec0Idx + 2);
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
: N->getOperand(Vec0Idx + 3);
if (is64BitVector)
- SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
else
- SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+ SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
}
Ops.push_back(SuperReg);
Ops.push_back(getI32Imm(Lane));
@@ -2186,7 +2192,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
SDValue V0 = N->getOperand(FirstTblReg + 0);
SDValue V1 = N->getOperand(FirstTblReg + 1);
if (NumVecs == 2)
- RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+ RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
else {
SDValue V2 = N->getOperand(FirstTblReg + 2);
// If it's a vtbl3, form a quad D-register and leave the last part as
@@ -2194,7 +2200,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
: N->getOperand(FirstTblReg + 3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
}
SmallVector<SDValue, 6> Ops;
@@ -2510,7 +2516,7 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
EVT VT = N->getValueType(0);
if (!VT.is128BitVector() || N->getNumOperands() != 2)
llvm_unreachable("unexpected CONCAT_VECTORS");
- return PairDRegs(VT, N->getOperand(0), N->getOperand(1));
+ return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
}
SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
@@ -2891,13 +2897,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
unsigned NumElts = VecVT.getVectorNumElements();
if (EltVT == MVT::f64) {
assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
- return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
}
assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
if (NumElts == 2)
- return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
- return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1),
+ return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
N->getOperand(2), N->getOperand(3));
}
@@ -3110,17 +3116,19 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
SDValue Chain = N->getOperand(0);
- unsigned NewOpc = ARM::LDREXD;
- if (Subtarget->isThumb() && Subtarget->hasThumb2())
- NewOpc = ARM::t2LDREXD;
+ bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
+ unsigned NewOpc = isThumb ? ARM::t2LDREXD :ARM::LDREXD;
// arm_ldrexd returns a i64 value in {i32, i32}
std::vector<EVT> ResTys;
- ResTys.push_back(MVT::i32);
- ResTys.push_back(MVT::i32);
+ if (isThumb) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ } else
+ ResTys.push_back(MVT::Untyped);
ResTys.push_back(MVT::Other);
- // place arguments in the right order
+ // Place arguments in the right order.
SmallVector<SDValue, 7> Ops;
Ops.push_back(MemAddr);
Ops.push_back(getAL(CurDAG));
@@ -3133,30 +3141,35 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
- // Until there's support for specifing explicit register constraints
- // like the use of even/odd register pair, hardcode ldrexd to always
- // use the pair [R0, R1] to hold the load result.
- Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R0,
- SDValue(Ld, 0), SDValue(0,0));
- Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R1,
- SDValue(Ld, 1), Chain.getValue(1));
-
// Remap uses.
- SDValue Glue = Chain.getValue(1);
+ SDValue Glue = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
if (!SDValue(N, 0).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- ARM::R0, MVT::i32, Glue);
- Glue = Result.getValue(2);
+ SDValue Result;
+ if (isThumb)
+ Result = SDValue(Ld, 0);
+ else {
+ SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
+ SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl, MVT::i32, MVT::Glue, SDValue(Ld, 0), SubRegIdx, Glue);
+ Result = SDValue(ResNode,0);
+ Glue = Result.getValue(1);
+ }
ReplaceUses(SDValue(N, 0), Result);
}
if (!SDValue(N, 1).use_empty()) {
- SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- ARM::R1, MVT::i32, Glue);
- Glue = Result.getValue(2);
+ SDValue Result;
+ if (isThumb)
+ Result = SDValue(Ld, 1);
+ else {
+ SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
+ SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl, MVT::i32, MVT::Glue, SDValue(Ld, 0), SubRegIdx, Glue);
+ Result = SDValue(ResNode,0);
+ Glue = Result.getValue(1);
+ }
ReplaceUses(SDValue(N, 1), Result);
}
-
- ReplaceUses(SDValue(N, 2), SDValue(Ld, 2));
+ ReplaceUses(SDValue(N, 2), Glue);
return NULL;
}
@@ -3167,38 +3180,27 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Val1 = N->getOperand(3);
SDValue MemAddr = N->getOperand(4);
- // Until there's support for specifing explicit register constraints
- // like the use of even/odd register pair, hardcode strexd to always
- // use the pair [R2, R3] to hold the i64 (i32, i32) value to be stored.
- Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R2, Val0,
- SDValue(0, 0));
- Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R3, Val1, Chain.getValue(1));
-
- SDValue Glue = Chain.getValue(1);
- Val0 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- ARM::R2, MVT::i32, Glue);
- Glue = Val0.getValue(1);
- Val1 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- ARM::R3, MVT::i32, Glue);
-
// Store exclusive double return a i32 value which is the return status
// of the issued store.
std::vector<EVT> ResTys;
ResTys.push_back(MVT::i32);
ResTys.push_back(MVT::Other);
- // place arguments in the right order
+ bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
+ // Place arguments in the right order.
SmallVector<SDValue, 7> Ops;
- Ops.push_back(Val0);
- Ops.push_back(Val1);
+ if (isThumb) {
+ Ops.push_back(Val0);
+ Ops.push_back(Val1);
+ } else
+ // arm_strexd uses GPRPair.
+ Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
Ops.push_back(MemAddr);
Ops.push_back(getAL(CurDAG));
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
Ops.push_back(Chain);
- unsigned NewOpc = ARM::STREXD;
- if (Subtarget->isThumb() && Subtarget->hasThumb2())
- NewOpc = ARM::t2STREXD;
+ unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD;
SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
Ops.size());
@@ -3396,7 +3398,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
// Form a REG_SEQUENCE to force register allocation.
SDValue V0 = N->getOperand(0);
SDValue V1 = N->getOperand(1);
- SDValue RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+ SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
SmallVector<SDValue, 6> Ops;
Ops.push_back(RegSeq);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 752a492c61..e1f5aa3fed 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -525,6 +525,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
+ setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
// Neon does not support some operations on v1i64 and v2i64 types.
@@ -549,6 +553,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
@@ -6088,12 +6095,16 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
// for ldrexd must be different.
BB = loopMBB;
// Load
+ unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ unsigned GPRPair1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+
AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
- .addReg(ARM::R2, RegState::Define)
- .addReg(ARM::R3, RegState::Define).addReg(ptr));
+ .addReg(GPRPair0, RegState::Define).addReg(ptr));
// Copy r2/r3 into dest. (This copy will normally be coalesced.)
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2);
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
+ .addReg(GPRPair0, 0, ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
+ .addReg(GPRPair0, 0, ARM::gsub_1);
if (IsCmpxchg) {
// Add early exit
@@ -6112,24 +6123,56 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
// Copy to physregs for strexd
unsigned setlo = MI->getOperand(5).getReg();
unsigned sethi = MI->getOperand(6).getReg();
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo);
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi);
+ unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
+ .addReg(undef)
+ .addReg(setlo)
+ .addImm(ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1)
+ .addReg(r1)
+ .addReg(sethi)
+ .addImm(ARM::gsub_1);
} else if (Op1) {
// Perform binary operation
- AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0)
+ unsigned tmpRegLo = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), tmpRegLo)
.addReg(destlo).addReg(vallo))
.addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry));
- AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1)
+ unsigned tmpRegHi = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi)
.addReg(desthi).addReg(valhi)).addReg(0);
+
+ unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
+ unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
+ .addReg(UndefPair)
+ .addReg(tmpRegLo)
+ .addImm(ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1)
+ .addReg(r1)
+ .addReg(tmpRegHi)
+ .addImm(ARM::gsub_1);
} else {
// Copy to physregs for strexd
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo);
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi);
+ unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
+ .addReg(UndefPair)
+ .addReg(vallo)
+ .addImm(ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1)
+ .addReg(r1)
+ .addReg(valhi)
+ .addImm(ARM::gsub_1);
}
// Store
AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
- .addReg(ARM::R0).addReg(ARM::R1).addReg(ptr));
+ .addReg(GPRPair1).addReg(ptr));
// Cmp+jump
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(storesuccess).addImm(0));
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 3302ec69a5..0deddc38fd 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -251,7 +251,7 @@ namespace llvm {
public:
explicit ARMTargetLowering(TargetMachine &TM);
- virtual unsigned getJumpTableEncoding(void) const;
+ virtual unsigned getJumpTableEncoding() const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 9f7e50cd27..137d9598a0 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -430,6 +430,8 @@ def reglist : Operand<i32> {
let DecoderMethod = "DecodeRegListOperand";
}
+def GPRPairOp : RegisterOperand<GPRPair, "printGPRPairOperand">;
+
def DPRRegListAsmOperand : AsmOperandClass { let Name = "DPRRegList"; }
def dpr_reglist : Operand<i32> {
let EncoderMethod = "getRegisterListOpValue";
@@ -4374,8 +4376,8 @@ def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
NoItinerary, "ldrex", "\t$Rt, $addr", []>;
let hasExtraDefRegAllocReq = 1 in
-def LDREXD: AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2),(ins addr_offset_none:$addr),
- NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []> {
+def LDREXD: AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr),
+ NoItinerary, "ldrexd", "\t$Rt, $addr", []> {
let DecoderMethod = "DecodeDoubleRegLoad";
}
}
@@ -4389,8 +4391,8 @@ def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>;
let hasExtraSrcRegAllocReq = 1 in
def STREXD : AIstrex<0b01, (outs GPR:$Rd),
- (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr),
- NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []> {
+ (ins GPRPairOp:$Rt, addr_offset_none:$addr),
+ NoItinerary, "strexd", "\t$Rd, $Rt, $addr", []> {
let DecoderMethod = "DecodeDoubleRegStore";
}
}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 3cf213cbff..697a8d28c5 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -4877,12 +4877,15 @@ defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
int_arm_neon_vabs>;
-def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
- IIC_VUNAD, "vabs", "f32",
- v2f32, v2f32, int_arm_neon_vabs>;
-def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
- IIC_VUNAQ, "vabs", "f32",
- v4f32, v4f32, int_arm_neon_vabs>;
+def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+ "vabs", "f32",
+ v2f32, v2f32, fabs>;
+def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+ "vabs", "f32",
+ v4f32, v4f32, fabs>;
+
+def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>;
+def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>;
// VQABS : Vector Saturating Absolute Value
defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index c2800acccd..f1eb83382d 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -130,8 +130,9 @@ def imm0_4095_neg : Operand<i32>, PatLeaf<(i32 imm), [{
let ParserMatchClass = imm0_4095_neg_asmoperand;
}
-def imm0_255_neg : PatLeaf<(i32 imm), [{
- return (uint32_t)(-N->getZExtValue()) < 255;
+def imm1_255_neg : PatLeaf<(i32 imm), [{
+ uint32_t Val = -N->getZExtValue();
+ return (Val > 0 && Val < 255);
}], imm_neg_XFORM>;
def imm0_255_not : PatLeaf<(i32 imm), [{
@@ -1928,8 +1929,8 @@ defm t2RSBS : T2I_rbin_s_is <BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
// The AddedComplexity preferences the first variant over the others since
// it can be shrunk to a 16-bit wide encoding, while the others cannot.
let AddedComplexity = 1 in
-def : T2Pat<(add GPR:$src, imm0_255_neg:$imm),
- (t2SUBri GPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(add GPR:$src, imm1_255_neg:$imm),
+ (t2SUBri GPR:$src, imm1_255_neg:$imm)>;
def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
(t2SUBri GPR:$src, t2_so_imm_neg:$imm)>;
def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
@@ -1938,8 +1939,8 @@ def : T2Pat<(add GPR:$src, imm0_65535_neg:$imm),
(t2SUBrr GPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
let AddedComplexity = 1 in
-def : T2Pat<(ARMaddc rGPR:$src, imm0_255_neg:$imm),
- (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(ARMaddc rGPR:$src, imm1_255_neg:$imm),
+ (t2SUBSri rGPR:$src, imm1_255_neg:$imm)>;
def : T2Pat<(ARMaddc rGPR:$src, t2_so_imm_neg:$imm),
(t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>;
def : T2Pat<(ARMaddc rGPR:$src, imm0_65535_neg:$imm),
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index adbe5fb943..3c6a72bdb5 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -10,10 +10,12 @@
#include "ARMTargetObjectFile.h"
#include "ARMSubtarget.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ELF.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/StringExtras.h"
using namespace llvm;
using namespace dwarf;
@@ -40,3 +42,14 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
0,
SectionKind::getMetadata());
}
+
+const MCExpr *ARMElfTargetObjectFile::
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
+ assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only");
+
+ return MCSymbolRefExpr::Create(Mang->getSymbol(GV),
+ MCSymbolRefExpr::VK_ARM_TARGET2,
+ getContext());
+}
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index c6a7261439..7f60727e53 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -28,6 +28,11 @@ public:
virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+ const MCExpr *
+ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const;
+
virtual const MCSection *getAttributesSection() const {
return AttributesSection;
}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index c61e3bd99d..7383aa21ce 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -5187,6 +5187,45 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
}
+ // Adjust operands of ldrexd/strexd to MCK_GPRPair.
+ // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
+ // a single GPRPair reg operand is used in the .td file to replace the two
+ // GPRs. However, when parsing from asm, the two GRPs cannot be automatically
+ // expressed as a GPRPair, so we have to manually merge them.
+ // FIXME: We would really like to be able to tablegen'erate this.
+ if (!isThumb() && Operands.size() > 4 &&
+ (Mnemonic == "ldrexd" || Mnemonic == "strexd")) {
+ bool isLoad = (Mnemonic == "ldrexd");
+ unsigned Idx = isLoad ? 2 : 3;
+ ARMOperand* Op1 = static_cast<ARMOperand*>(Operands[Idx]);
+ ARMOperand* Op2 = static_cast<ARMOperand*>(Operands[Idx+1]);
+
+ const MCRegisterClass& MRC = MRI->getRegClass(ARM::GPRRegClassID);
+ // Adjust only if Op1 and Op2 are GPRs.
+ if (Op1->isReg() && Op2->isReg() && MRC.contains(Op1->getReg()) &&
+ MRC.contains(Op2->getReg())) {
+ unsigned Reg1 = Op1->getReg();
+ unsigned Reg2 = Op2->getReg();
+ unsigned Rt = MRI->getEncodingValue(Reg1);
+ unsigned Rt2 = MRI->getEncodingValue(Reg2);
+
+ // Rt2 must be Rt + 1 and Rt must be even.
+ if (Rt + 1 != Rt2 || (Rt & 1)) {
+ Error(Op2->getStartLoc(), isLoad ?
+ "destination operands must be sequential" :
+ "source operands must be sequential");
+ return true;
+ }
+ unsigned NewReg = MRI->getMatchingSuperReg(Reg1, ARM::gsub_0,
+ &(MRI->getRegClass(ARM::GPRPairRegClassID)));
+ Operands.erase(Operands.begin() + Idx, Operands.begin() + Idx + 2);
+ Operands.insert(Operands.begin() + Idx, ARMOperand::CreateReg(
+ NewReg, Op1->getStartLoc(), Op2->getEndLoc()));
+ delete Op1;
+ delete Op2;
+ }
+ }
+
return false;
}
@@ -5274,8 +5313,7 @@ validateInstruction(MCInst &Inst,
switch (Inst.getOpcode()) {
case ARM::LDRD:
case ARM::LDRD_PRE:
- case ARM::LDRD_POST:
- case ARM::LDREXD: {
+ case ARM::LDRD_POST: {
// Rt2 must be Rt + 1.
unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
@@ -5294,8 +5332,7 @@ validateInstruction(MCInst &Inst,
return false;
}
case ARM::STRD_PRE:
- case ARM::STRD_POST:
- case ARM::STREXD: {
+ case ARM::STRD_POST: {
// Rt2 must be Rt + 1.
unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg());
unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(2).getReg());
@@ -7483,6 +7520,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
bool MatchingInlineAsm) {
MCInst Inst;
unsigned MatchResult;
+
MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
MatchingInlineAsm);
switch (MatchResult) {
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index beeabb6d42..75de07e7a9 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -317,6 +317,35 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
+ // Combine 2 GPRs from disassember into a GPRPair to match with instr def.
+ // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
+ // a single GPRPair reg operand is used in the .td file to replace the two
+ // GPRs. However, when decoding them, the two GRPs cannot be automatically
+ // expressed as a GPRPair, so we have to manually merge them.
+ // FIXME: We would really like to be able to tablegen'erate this.
+ if (Opcode == ARM::LDREXD || Opcode == ARM::STREXD) {
+ const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID);
+ bool isStore = Opcode == ARM::STREXD;
+ unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg();
+ if (MRC.contains(Reg)) {
+ MCInst NewMI;
+ MCOperand NewReg;
+ NewMI.setOpcode(Opcode);
+
+ if (isStore)
+ NewMI.addOperand(MI->getOperand(0));
+ NewReg = MCOperand::CreateReg(MRI.getMatchingSuperReg(Reg, ARM::gsub_0,
+ &MRI.getRegClass(ARM::GPRPairRegClassID)));
+ NewMI.addOperand(NewReg);
+
+ // Copy the rest operands into NewMI.
+ for(unsigned i= isStore ? 3 : 2; i < MI->getNumOperands(); ++i)
+ NewMI.addOperand(MI->getOperand(i));
+ printInstruction(&NewMI, O);
+ return;
+ }
+ }
+
printInstruction(MI, O);
printAnnotation(O, Annot);
}
@@ -756,6 +785,15 @@ void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
O << "}";
}
+void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ printRegName(O, MRI.getSubReg(Reg, ARM::gsub_0));
+ O << ", ";
+ printRegName(O, MRI.getSubReg(Reg, ARM::gsub_1));
+}
+
+
void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNum);
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index b7bab5fdcd..edff75d886 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -124,6 +124,7 @@ public:
void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printGPRPairOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 8abf449206..f2c4233a8a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -115,6 +115,10 @@ public:
MCValue &Target, uint64_t &Value,
bool &IsResolved);
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+
bool mayNeedRelaxation(const MCInst &Inst) const;
bool fixupNeedsRelaxation(const MCFixup &Fixup,
@@ -561,83 +565,6 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
(void)adjustFixupValue(Fixup, Value, &Asm.getContext());
}
-namespace {
-
-// FIXME: This should be in a separate file.
-// ELF is an ELF of course...
-class ELFARMAsmBackend : public ARMAsmBackend {
-public:
- uint8_t OSABI;
- Triple::OSType OSType; // @LOCALMOD: kept OSTYPE vs upstream. FIXME: remove.
- ELFARMAsmBackend(const Target &T, const StringRef TT,
- uint8_t _OSABI,
- Triple::OSType _OSType)
- : ARMAsmBackend(T, TT), OSABI(_OSABI), OSType(_OSType) { }
-
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value) const;
-
- // @LOCALMOD-BEGIN
- // FIXME! NaCl should INHERIT from ELFARMAsmBackend, not
- // add to it.
- unsigned getBundleSize() const {
- return (OSType == Triple::NativeClient) ? 16 : 0;
- }
-
- bool CustomExpandInst(const MCInst &Inst, MCStreamer &Out) const {
- if (OSType == Triple::NativeClient) {
- return CustomExpandInstNaClARM(Inst, Out);
- }
- return false;
- }
-
- // @LOCALMOD-END
-
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createARMELFObjectWriter(OS, OSABI);
- }
-};
-
-// FIXME: Raise this to share code between Darwin and ELF.
-void ELFARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value) const {
- unsigned NumBytes = 4; // FIXME: 2 for Thumb
- Value = adjustFixupValue(Fixup, Value);
- if (!Value) return; // Doesn't change encoding.
-
- unsigned Offset = Fixup.getOffset();
-
- // For each byte of the fragment that the fixup touches, mask in the bits from
- // the fixup value. The Value has been "split up" into the appropriate
- // bitfields above.
- for (unsigned i = 0; i != NumBytes; ++i)
- Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
-}
-
-// FIXME: This should be in a separate file.
-class DarwinARMAsmBackend : public ARMAsmBackend {
-public:
- const object::mach::CPUSubtypeARM Subtype;
- DarwinARMAsmBackend(const Target &T, const StringRef TT,
- object::mach::CPUSubtypeARM st)
- : ARMAsmBackend(T, TT), Subtype(st) {
- HasDataInCodeSupport = true;
- }
-
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
- object::mach::CTM_ARM,
- Subtype);
- }
-
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value) const;
-
- virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
- return false;
- }
-};
-
/// getFixupKindNumBytes - The number of bytes the fixup may change.
static unsigned getFixupKindNumBytes(unsigned Kind) {
switch (Kind) {
@@ -686,8 +613,8 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
}
}
-void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value) const {
+void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
Value = adjustFixupValue(Fixup, Value);
if (!Value) return; // Doesn't change encoding.
@@ -695,12 +622,66 @@ void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned Offset = Fixup.getOffset();
assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
- // For each byte of the fragment that the fixup touches, mask in the
- // bits from the fixup value.
+ // For each byte of the fragment that the fixup touches, mask in the bits from
+ // the fixup value. The Value has been "split up" into the appropriate
+ // bitfields above.
for (unsigned i = 0; i != NumBytes; ++i)
Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
}
+namespace {
+
+// FIXME: This should be in a separate file.
+// ELF is an ELF of course...
+class ELFARMAsmBackend : public ARMAsmBackend {
+public:
+ uint8_t OSABI;
+ ELFARMAsmBackend(const Target &T, const StringRef TT,
+ uint8_t _OSABI)
+ : ARMAsmBackend(T, TT), OSABI(_OSABI) { }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createARMELFObjectWriter(OS, OSABI);
+ }
+};
+
+// @LOCALMOD-BEGIN
+class NaClARMAsmBackend : public ELFARMAsmBackend {
+ public:
+ NaClARMAsmBackend(const Target &T, const StringRef TT,
+ uint8_t OSABI)
+ : ELFARMAsmBackend(T, TT, OSABI) { }
+ unsigned getBundleSize() const {
+ return 16;
+ }
+
+ bool CustomExpandInst(const MCInst &Inst, MCStreamer &Out) const {
+ return CustomExpandInstNaClARM(Inst, Out);
+ }
+};
+// @LOCALMOD-END
+
+// FIXME: This should be in a separate file.
+class DarwinARMAsmBackend : public ARMAsmBackend {
+public:
+ const object::mach::CPUSubtypeARM Subtype;
+ DarwinARMAsmBackend(const Target &T, const StringRef TT,
+ object::mach::CPUSubtypeARM st)
+ : ARMAsmBackend(T, TT), Subtype(st) {
+ HasDataInCodeSupport = true;
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
+ object::mach::CTM_ARM,
+ Subtype);
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ return false;
+ }
+};
+
} // end anonymous namespace
MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU) {
@@ -732,5 +713,7 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef
assert(0 && "Windows not supported on ARM");
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
- return new ELFARMAsmBackend(T, TT, OSABI, TheTriple.getOS());
+ if (TheTriple.getOS() == llvm::Triple::NativeClient)
+ return new NaClARMAsmBackend(T, TT, OSABI);
+ return new ELFARMAsmBackend(T, TT, OSABI);
}
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
deleted file mode 100644
index 1f8ca8681c..0000000000
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS SPU.td)
-
-tablegen(LLVM SPUGenAsmWriter.inc -gen-asm-writer)
-tablegen(LLVM SPUGenCodeEmitter.inc -gen-emitter)
-tablegen(LLVM SPUGenRegisterInfo.inc -gen-register-info)
-tablegen(LLVM SPUGenInstrInfo.inc -gen-instr-info)
-tablegen(LLVM SPUGenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM SPUGenSubtargetInfo.inc -gen-subtarget)
-tablegen(LLVM SPUGenCallingConv.inc -gen-callingconv)
-add_public_tablegen_target(CellSPUCommonTableGen)
-
-add_llvm_target(CellSPUCodeGen
- SPUAsmPrinter.cpp
- SPUHazardRecognizers.cpp
- SPUInstrInfo.cpp
- SPUISelDAGToDAG.cpp
- SPUISelLowering.cpp
- SPUFrameLowering.cpp
- SPUMachineFunction.cpp
- SPURegisterInfo.cpp
- SPUSubtarget.cpp
- SPUTargetMachine.cpp
- SPUSelectionDAGInfo.cpp
- SPUNopFiller.cpp
- )
-
-add_dependencies(LLVMCellSPUCodeGen intrinsics_gen)
-
-add_subdirectory(TargetInfo)
-add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/CellSPU/CellSDKIntrinsics.td b/lib/Target/CellSPU/CellSDKIntrinsics.td
deleted file mode 100644
index cdb4099ffb..0000000000
--- a/lib/Target/CellSPU/CellSDKIntrinsics.td
+++ /dev/null
@@ -1,449 +0,0 @@
-//===-- CellSDKIntrinsics.td - Cell SDK Intrinsics ---------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-///--==-- Arithmetic ops intrinsics --==--
-def CellSDKah:
- RR_Int_v8i16<0b00010011000, "ah", IntegerOp, int_spu_si_ah>;
-def CellSDKahi:
- RI10_Int_v8i16<0b00010011000, "ahi", IntegerOp, int_spu_si_ahi>;
-def CellSDKa:
- RR_Int_v4i32<0b00000011000, "a", IntegerOp, int_spu_si_a>;
-def CellSDKai:
- RI10_Int_v4i32<0b00111000, "ai", IntegerOp, int_spu_si_ai>;
-def CellSDKsfh:
- RR_Int_v8i16<0b00010010000, "sfh", IntegerOp, int_spu_si_sfh>;
-def CellSDKsfhi:
- RI10_Int_v8i16<0b10110000, "sfhi", IntegerOp, int_spu_si_sfhi>;
-def CellSDKsf:
- RR_Int_v4i32<0b00000010000, "sf", IntegerOp, int_spu_si_sf>;
-def CellSDKsfi:
- RI10_Int_v4i32<0b00110000, "sfi", IntegerOp, int_spu_si_sfi>;
-def CellSDKaddx:
- RR_Int_v4i32<0b00000010110, "addx", IntegerOp, int_spu_si_addx>;
-def CellSDKcg:
- RR_Int_v4i32<0b0100001100, "cg", IntegerOp, int_spu_si_cg>;
-def CellSDKcgx:
- RR_Int_v4i32<0b01000010110, "cgx", IntegerOp, int_spu_si_cgx>;
-def CellSDKsfx:
- RR_Int_v4i32<0b10000010110, "sfx", IntegerOp, int_spu_si_sfx>;
-def CellSDKbg:
- RR_Int_v4i32<0b01000010000, "bg", IntegerOp, int_spu_si_bg>;
-def CellSDKbgx:
- RR_Int_v4i32<0b11000010110, "bgx", IntegerOp, int_spu_si_bgx>;
-
-def CellSDKmpy:
- RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpy $rT, $rA, $rB", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpy (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
-def CellSDKmpyu:
- RRForm<0b00110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpyu $rT, $rA, $rB", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpyu (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))] >;
-
-def CellSDKmpyi:
- RI10Form<0b00101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "mpyi $rT, $rA, $val", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpyi (v8i16 VECREG:$rA),
- i16ImmSExt10:$val))]>;
-
-def CellSDKmpyui:
- RI10Form<0b10101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "mpyui $rT, $rA, $val", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpyui (v8i16 VECREG:$rA),
- i16ImmSExt10:$val))]>;
-
-def CellSDKmpya:
- RRRForm<0b0011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "mpya $rT, $rA, $rB, $rC", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpya (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB),
- (v8i16 VECREG:$rC)))]>;
-
-def CellSDKmpyh:
- RRForm<0b10100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpyh $rT, $rA, $rB", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpyh (v4i32 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
-def CellSDKmpys:
- RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpys $rT, $rA, $rB", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpys (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
-def CellSDKmpyhh:
- RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpyhh $rT, $rA, $rB", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhh (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
-def CellSDKmpyhha:
- RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpyhha $rT, $rA, $rB", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhha (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
-// Not sure how to match a (set $rT, (add $rT (mpyhh $rA, $rB)))... so leave
-// as an intrinsic for the time being
-def CellSDKmpyhhu:
- RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpyhhu $rT, $rA, $rB", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhu (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
-def CellSDKmpyhhau:
- RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpyhhau $rT, $rA, $rB", IntegerMulDiv,
- [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhau (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
-def CellSDKand:
- RRForm<0b1000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "and\t $rT, $rA, $rB", IntegerOp,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_and (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKandc:
- RRForm<0b10000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "andc\t $rT, $rA, $rB", IntegerOp,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_andc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKandbi:
- RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
- "andbi\t $rT, $rA, $val", BranchResolv,
- [(set (v16i8 VECREG:$rT),
- (int_spu_si_andbi (v16i8 VECREG:$rA), immU8:$val))]>;
-
-def CellSDKandhi:
- RI10Form<0b10101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "andhi\t $rT, $rA, $val", BranchResolv,
- [(set (v8i16 VECREG:$rT),
- (int_spu_si_andhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
-
-def CellSDKandi:
- RI10Form<0b00101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "andi\t $rT, $rA, $val", BranchResolv,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_andi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
-
-def CellSDKor:
- RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "or\t $rT, $rA, $rB", IntegerOp,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_or (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKorc:
- RRForm<0b10010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "addc\t $rT, $rA, $rB", IntegerOp,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_orc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKorbi:
- RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
- "orbi\t $rT, $rA, $val", BranchResolv,
- [(set (v16i8 VECREG:$rT),
- (int_spu_si_orbi (v16i8 VECREG:$rA), immU8:$val))]>;
-
-def CellSDKorhi:
- RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "orhi\t $rT, $rA, $val", BranchResolv,
- [(set (v8i16 VECREG:$rT),
- (int_spu_si_orhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
-
-def CellSDKori:
- RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "ori\t $rT, $rA, $val", BranchResolv,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_ori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
-
-def CellSDKxor:
- RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "xor\t $rT, $rA, $rB", IntegerOp,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_xor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKxorbi:
- RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
- "xorbi\t $rT, $rA, $val", BranchResolv,
- [(set (v16i8 VECREG:$rT), (int_spu_si_xorbi (v16i8 VECREG:$rA), immU8:$val))]>;
-
-def CellSDKxorhi:
- RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "xorhi\t $rT, $rA, $val", BranchResolv,
- [(set (v8i16 VECREG:$rT),
- (int_spu_si_xorhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
-
-def CellSDKxori:
- RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "xori\t $rT, $rA, $val", BranchResolv,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_xori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
-
-def CellSDKnor:
- RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "nor\t $rT, $rA, $rB", IntegerOp,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_nor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKnand:
- RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "nand\t $rT, $rA, $rB", IntegerOp,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_nand (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-//===----------------------------------------------------------------------===//
-// Shift/rotate intrinsics:
-//===----------------------------------------------------------------------===//
-
-def CellSDKshli:
- Pat<(int_spu_si_shli (v4i32 VECREG:$rA), uimm7:$val),
- (SHLIv4i32 VECREG:$rA, (TO_IMM32 imm:$val))>;
-
-def CellSDKshlqbi:
- Pat<(int_spu_si_shlqbi VECREG:$rA, R32C:$rB),
- (SHLQBIv16i8 VECREG:$rA, R32C:$rB)>;
-
-def CellSDKshlqii:
- Pat<(int_spu_si_shlqbii VECREG:$rA, uimm7:$val),
- (SHLQBIIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>;
-
-def CellSDKshlqby:
- Pat<(int_spu_si_shlqby VECREG:$rA, R32C:$rB),
- (SHLQBYv16i8 VECREG:$rA, R32C:$rB)>;
-
-def CellSDKshlqbyi:
- Pat<(int_spu_si_shlqbyi VECREG:$rA, uimm7:$val),
- (SHLQBYIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>;
-
-
-//===----------------------------------------------------------------------===//
-// Branch/compare intrinsics:
-//===----------------------------------------------------------------------===//
-
-def CellSDKceq:
- RRForm<0b00000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "ceq\t $rT, $rA, $rB", BranchResolv,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_ceq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKceqi:
- RI10Form<0b00111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "ceqi\t $rT, $rA, $val", BranchResolv,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_ceqi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
-
-def CellSDKceqb:
- RRForm<0b00001011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "ceqb\t $rT, $rA, $rB", BranchResolv,
- [(set (v16i8 VECREG:$rT),
- (int_spu_si_ceqb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
-
-def CellSDKceqbi:
- RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
- "ceqbi\t $rT, $rA, $val", BranchResolv,
- [(set (v16i8 VECREG:$rT), (int_spu_si_ceqbi (v16i8 VECREG:$rA), immU8:$val))]>;
-
-def CellSDKceqh:
- RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "ceqh\t $rT, $rA, $rB", BranchResolv,
- [(set (v8i16 VECREG:$rT),
- (int_spu_si_ceqh (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
-
-def CellSDKceqhi:
- RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "ceqhi\t $rT, $rA, $val", BranchResolv,
- [(set (v8i16 VECREG:$rT),
- (int_spu_si_ceqhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
-def CellSDKcgth:
- RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "cgth\t $rT, $rA, $rB", BranchResolv,
- [(set (v8i16 VECREG:$rT),
- (int_spu_si_cgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
-
-def CellSDKcgthi:
- RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "cgthi\t $rT, $rA, $val", BranchResolv,
- [(set (v8i16 VECREG:$rT),
- (int_spu_si_cgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
-
-def CellSDKcgt:
- RRForm<0b00000010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "cgt\t $rT, $rA, $rB", BranchResolv,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_cgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKcgti:
- RI10Form<0b00110010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "cgti\t $rT, $rA, $val", BranchResolv,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_cgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
-
-def CellSDKcgtb:
- RRForm<0b00001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "cgtb\t $rT, $rA, $rB", BranchResolv,
- [(set (v16i8 VECREG:$rT),
- (int_spu_si_cgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
-
-def CellSDKcgtbi:
- RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
- "cgtbi\t $rT, $rA, $val", BranchResolv,
- [(set (v16i8 VECREG:$rT), (int_spu_si_cgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
-
-def CellSDKclgth:
- RRForm<0b00010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "clgth\t $rT, $rA, $rB", BranchResolv,
- [(set (v8i16 VECREG:$rT),
- (int_spu_si_clgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
-
-def CellSDKclgthi:
- RI10Form<0b10111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "clgthi\t $rT, $rA, $val", BranchResolv,
- [(set (v8i16 VECREG:$rT),
- (int_spu_si_clgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
-
-def CellSDKclgt:
- RRForm<0b00000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "clgt\t $rT, $rA, $rB", BranchResolv,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_clgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
-def CellSDKclgti:
- RI10Form<0b00111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "clgti\t $rT, $rA, $val", BranchResolv,
- [(set (v4i32 VECREG:$rT),
- (int_spu_si_clgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
-
-def CellSDKclgtb:
- RRForm<0b00001011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "clgtb\t $rT, $rA, $rB", BranchResolv,
- [(set (v16i8 VECREG:$rT),
- (int_spu_si_clgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
-
-def CellSDKclgtbi:
- RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
- "clgtbi\t $rT, $rA, $val", BranchResolv,
- [(set (v16i8 VECREG:$rT),
- (int_spu_si_clgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
-
-//===----------------------------------------------------------------------===//
-// Floating-point intrinsics:
-//===----------------------------------------------------------------------===//
-
-def CellSDKfa:
- RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fa\t $rT, $rA, $rB", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fa (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfs:
- RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fs\t $rT, $rA, $rB", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fs (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfm:
- RRForm<0b01100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fm\t $rT, $rA, $rB", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fm (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfceq:
- RRForm<0b01000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fceq\t $rT, $rA, $rB", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fceq (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfcgt:
- RRForm<0b01000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fcgt\t $rT, $rA, $rB", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fcgt (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfcmeq:
- RRForm<0b01010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fcmeq\t $rT, $rA, $rB", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fcmeq (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfcmgt:
- RRForm<0b01010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fcmgt\t $rT, $rA, $rB", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fcmgt (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB)))]>;
-
-def CellSDKfma:
- RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fma\t $rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fma (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB),
- (v4f32 VECREG:$rC)))]>;
-
-def CellSDKfnms:
- RRRForm<0b1011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fnms\t $rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fnms (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB),
- (v4f32 VECREG:$rC)))]>;
-
-def CellSDKfms:
- RRRForm<0b1111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fms\t $rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT), (int_spu_si_fms (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB),
- (v4f32 VECREG:$rC)))]>;
-
-//===----------------------------------------------------------------------===//
-// Double precision floating-point intrinsics:
-//===----------------------------------------------------------------------===//
-
-def CellSDKdfa:
- RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfa\t $rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT), (int_spu_si_dfa (v2f64 VECREG:$rA),
- (v2f64 VECREG:$rB)))]>;
-
-def CellSDKdfs:
- RRForm<0b10110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfs\t $rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT), (int_spu_si_dfs (v2f64 VECREG:$rA),
- (v2f64 VECREG:$rB)))]>;
-
-def CellSDKdfm:
- RRForm<0b01110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfm\t $rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT), (int_spu_si_dfm (v2f64 VECREG:$rA),
- (v2f64 VECREG:$rB)))]>;
-
-def CellSDKdfma:
- RRForm<0b00111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfma\t $rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT), (int_spu_si_dfma (v2f64 VECREG:$rA),
- (v2f64 VECREG:$rB)))]>;
-
-def CellSDKdfnma:
- RRForm<0b11111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfnma\t $rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT), (int_spu_si_dfnma (v2f64 VECREG:$rA),
- (v2f64 VECREG:$rB)))]>;
-
-def CellSDKdfnms:
- RRForm<0b01111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfnms\t $rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT), (int_spu_si_dfnms (v2f64 VECREG:$rA),
- (v2f64 VECREG:$rB)))]>;
-
-def CellSDKdfms:
- RRForm<0b10111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfms\t $rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT), (int_spu_si_dfms (v2f64 VECREG:$rA),
- (v2f64 VECREG:$rB)))]>;
diff --git a/lib/Target/CellSPU/LLVMBuild.txt b/lib/Target/CellSPU/LLVMBuild.txt
deleted file mode 100644
index 277620bf4e..0000000000
--- a/lib/Target/CellSPU/LLVMBuild.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-;===- ./lib/Target/CellSPU/LLVMBuild.txt -----------------------*- Conf -*--===;
-;
-; The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-; http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[common]
-subdirectories = MCTargetDesc TargetInfo
-
-[component_0]
-type = TargetGroup
-name = CellSPU
-parent = Target
-has_asmprinter = 1
-
-[component_1]
-type = Library
-name = CellSPUCodeGen
-parent = CellSPU
-required_libraries = AsmPrinter CellSPUDesc CellSPUInfo CodeGen Core MC SelectionDAG Support Target
-add_to_library_groups = CellSPU
diff --git a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index 0027bdbf6c..0000000000
--- a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-add_llvm_library(LLVMCellSPUDesc
- SPUMCTargetDesc.cpp
- SPUMCAsmInfo.cpp
- )
-
-add_dependencies(LLVMCellSPUDesc CellSPUCommonTableGen)
diff --git a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt
deleted file mode 100644
index 71e5bbc629..0000000000
--- a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-;===- ./lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
-;
-; The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-; http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = CellSPUDesc
-parent = CellSPU
-required_libraries = CellSPUInfo MC
-add_to_library_groups = CellSPU
diff --git a/lib/Target/CellSPU/MCTargetDesc/Makefile b/lib/Target/CellSPU/MCTargetDesc/Makefile
deleted file mode 100644
index 10d9a42239..0000000000
--- a/lib/Target/CellSPU/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMCellSPUDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
deleted file mode 100644
index 4bad37eaca..0000000000
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- SPUMCAsmInfo.cpp - Cell SPU asm properties ------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declarations of the SPUMCAsmInfo properties.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUMCAsmInfo.h"
-using namespace llvm;
-
-void SPULinuxMCAsmInfo::anchor() { }
-
-SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) {
- IsLittleEndian = false;
-
- ZeroDirective = "\t.space\t";
- Data64bitsDirective = "\t.quad\t";
- AlignmentIsInBytes = false;
-
- PCSymbol = ".";
- CommentString = "#";
- GlobalPrefix = "";
- PrivateGlobalPrefix = ".L";
-
- // Has leb128
- HasLEB128 = true;
-
- SupportsDebugInformation = true;
-
- // Exception handling is not supported on CellSPU (think about it: you only
- // have 256K for code+data. Would you support exception handling?)
- ExceptionsType = ExceptionHandling::None;
-
- // SPU assembly requires ".section" before ".bss"
- UsesELFSectionDirectiveForBSS = true;
-}
-
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
deleted file mode 100644
index f786147b92..0000000000
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
+++ /dev/null
@@ -1,30 +0,0 @@
-//===-- SPUMCAsmInfo.h - Cell SPU asm properties ---------------*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the SPUMCAsmInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPUTARGETASMINFO_H
-#define SPUTARGETASMINFO_H
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCAsmInfo.h"
-
-namespace llvm {
- class Target;
-
- class SPULinuxMCAsmInfo : public MCAsmInfo {
- virtual void anchor();
- public:
- explicit SPULinuxMCAsmInfo(const Target &T, StringRef TT);
- };
-} // namespace llvm
-
-#endif /* SPUTARGETASMINFO_H */
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
deleted file mode 100644
index 8450e2c663..0000000000
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-//===-- SPUMCTargetDesc.cpp - Cell SPU Target Descriptions ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides Cell SPU specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUMCTargetDesc.h"
-#include "SPUMCAsmInfo.h"
-#include "llvm/MC/MachineLocation.h"
-#include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_INSTRINFO_MC_DESC
-#include "SPUGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_MC_DESC
-#include "SPUGenSubtargetInfo.inc"
-
-#define GET_REGINFO_MC_DESC
-#include "SPUGenRegisterInfo.inc"
-
-using namespace llvm;
-
-static MCInstrInfo *createSPUMCInstrInfo() {
- MCInstrInfo *X = new MCInstrInfo();
- InitSPUMCInstrInfo(X);
- return X;
-}
-
-static MCRegisterInfo *createCellSPUMCRegisterInfo(StringRef TT) {
- MCRegisterInfo *X = new MCRegisterInfo();
- InitSPUMCRegisterInfo(X, SPU::R0);
- return X;
-}
-
-static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU,
- StringRef FS) {
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitSPUMCSubtargetInfo(X, TT, CPU, FS);
- return X;
-}
-
-static MCAsmInfo *createSPUMCAsmInfo(const Target &T, StringRef TT) {
- MCAsmInfo *MAI = new SPULinuxMCAsmInfo(T, TT);
-
- // Initial state of the frame pointer is R1.
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(SPU::R1, 0);
- MAI->addInitialFrameState(0, Dst, Src);
-
- return MAI;
-}
-
-static MCCodeGenInfo *createSPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL) {
- MCCodeGenInfo *X = new MCCodeGenInfo();
- // For the time being, use static relocations, since there's really no
- // support for PIC yet.
- X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
- return X;
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeCellSPUTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfoFn X(TheCellSPUTarget, createSPUMCAsmInfo);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheCellSPUTarget,
- createSPUMCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheCellSPUTarget,
- createCellSPUMCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget,
- createSPUMCSubtargetInfo);
-}
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
deleted file mode 100644
index d26449e890..0000000000
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===-- SPUMCTargetDesc.h - CellSPU Target Descriptions ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides CellSPU specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPUMCTARGETDESC_H
-#define SPUMCTARGETDESC_H
-
-namespace llvm {
-class Target;
-
-extern Target TheCellSPUTarget;
-
-} // End llvm namespace
-
-// Define symbolic names for Cell registers. This defines a mapping from
-// register name to register number.
-//
-#define GET_REGINFO_ENUM
-#include "SPUGenRegisterInfo.inc"
-
-// Defines symbolic names for the SPU instructions.
-//
-#define GET_INSTRINFO_ENUM
-#include "SPUGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_ENUM
-#include "SPUGenSubtargetInfo.inc"
-
-#endif
diff --git a/lib/Target/CellSPU/Makefile b/lib/Target/CellSPU/Makefile
deleted file mode 100644
index d7a8247f57..0000000000
--- a/lib/Target/CellSPU/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-##===- lib/Target/CellSPU/Makefile -------------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMCellSPUCodeGen
-TARGET = SPU
-BUILT_SOURCES = SPUGenInstrInfo.inc SPUGenRegisterInfo.inc \
- SPUGenAsmWriter.inc SPUGenCodeEmitter.inc \
- SPUGenDAGISel.inc \
- SPUGenSubtargetInfo.inc SPUGenCallingConv.inc
-
-DIRS = TargetInfo MCTargetDesc
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/README.txt b/lib/Target/CellSPU/README.txt
deleted file mode 100644
index 3bce9609bf..0000000000
--- a/lib/Target/CellSPU/README.txt
+++ /dev/null
@@ -1,106 +0,0 @@
-//===- README.txt - Notes for improving CellSPU-specific code gen ---------===//
-
-This code was contributed by a team from the Computer Systems Research
-Department in The Aerospace Corporation:
-
-- Scott Michel (head bottle washer and much of the non-floating point
- instructions)
-- Mark Thomas (floating point instructions)
-- Michael AuYeung (intrinsics)
-- Chandler Carruth (LLVM expertise)
-- Nehal Desai (debugging, i32 operations, RoadRunner SPU expertise)
-
-Some minor fixes added by Kalle Raiskila.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR
-OTHERWISE. IN NO EVENT SHALL THE AEROSPACE CORPORATION BE LIABLE FOR DAMAGES
-OF ANY KIND OR NATURE WHETHER BASED IN CONTRACT, TORT, OR OTHERWISE ARISING
-OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE INCLUDING, WITHOUT
-LIMITATION, DAMAGES RESULTING FROM LOST OR CONTAMINATED DATA, LOST PROFITS OR
-REVENUE, COMPUTER MALFUNCTION, OR FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL,
-OR PUNITIVE DAMAGES, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES OR
-SUCH DAMAGES ARE FORESEEABLE.
-
----------------------------------------------------------------------------
---WARNING--:
---WARNING--: The CellSPU work is work-in-progress and "alpha" quality code.
---WARNING--:
-
-If you are brave enough to try this code or help to hack on it, be sure
-to add 'spu' to configure's --enable-targets option, e.g.:
-
- ./configure <your_configure_flags_here> \
- --enable-targets=x86,x86_64,powerpc,spu
-
----------------------------------------------------------------------------
-
-TODO:
-* In commit r142152 vector legalization was set to element promotion per
- default. This breaks half vectors (e.g. v2i32) badly as they get element
- promoted to much slower types (v2i64).
-
-* Many CellSPU specific codegen tests only grep & count the number of
- instructions, not checking their place with FileCheck. There have also
- been some commits that change the CellSPU checks, some of which might
- have not been thoroughly scrutinized w.r.t. to the changes they cause in SPU
- assembly. (especially since about the time of r142152)
-
-* Some of the i64 math have huge tablegen rules, which sometime cause
- tablegen to run out of memory. See e.g. bug 8850. i64 arithmetics
- should probably be done with libraries.
-
-* Create a machine pass for performing dual-pipeline scheduling specifically
- for CellSPU, and insert branch prediction instructions as needed.
-
-* i32 instructions:
-
- * i32 division (work-in-progress)
-
-* i64 support (see i64operations.c test harness):
-
- * shifts and comparison operators: done
- * sign and zero extension: done
- * addition: done
- * subtraction: needed
- * multiplication: done
-
-* i128 support:
-
- * zero extension, any extension: done
- * sign extension: done
- * arithmetic operators (add, sub, mul, div): needed
- * logical operations (and, or, shl, srl, sra, xor, nor, nand): needed
-
- * or: done
-
-* f64 support
-
- * Comparison operators:
- SETOEQ unimplemented
- SETOGT unimplemented
- SETOGE unimplemented
- SETOLT unimplemented
- SETOLE unimplemented
- SETONE unimplemented
- SETO done (lowered)
- SETUO done (lowered)
- SETUEQ unimplemented
- SETUGT unimplemented
- SETUGE unimplemented
- SETULT unimplemented
- SETULE unimplemented
- SETUNE unimplemented
-
-* LLVM vector suport
-
- * VSETCC needs to be implemented. It's pretty straightforward to code, but
- needs implementation.
-
-* Intrinsics
-
- * spu.h instrinsics added but not tested. Need to have an operational
- llvm-spu-gcc in order to write a unit test harness.
-
-===-------------------------------------------------------------------------===
diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h
deleted file mode 100644
index c660131706..0000000000
--- a/lib/Target/CellSPU/SPU.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- SPU.h - Top-level interface for Cell SPU Target ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in the LLVM
-// Cell SPU back-end.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_IBMCELLSPU_H
-#define LLVM_TARGET_IBMCELLSPU_H
-
-#include "MCTargetDesc/SPUMCTargetDesc.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
- class SPUTargetMachine;
- class FunctionPass;
- class formatted_raw_ostream;
-
- FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
- FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm);
-
-}
-
-#endif /* LLVM_TARGET_IBMCELLSPU_H */
diff --git a/lib/Target/CellSPU/SPU.td b/lib/Target/CellSPU/SPU.td
deleted file mode 100644
index e835b9cac8..0000000000
--- a/lib/Target/CellSPU/SPU.td
+++ /dev/null
@@ -1,66 +0,0 @@
-//===-- SPU.td - Describe the STI Cell SPU Target Machine --*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This is the top level entry point for the STI Cell SPU target machine.
-//
-//===----------------------------------------------------------------------===//
-
-// Get the target-independent interfaces which we are implementing.
-//
-include "llvm/Target/Target.td"
-
-// Holder of code fragments (you'd think this'd already be in
-// a td file somewhere... :-)
-
-class CodeFrag<dag frag> {
- dag Fragment = frag;
-}
-
-//===----------------------------------------------------------------------===//
-// Register File Description
-//===----------------------------------------------------------------------===//
-
-include "SPURegisterInfo.td"
-
-//===----------------------------------------------------------------------===//
-// Instruction formats, instructions
-//===----------------------------------------------------------------------===//
-
-include "SPUNodes.td"
-include "SPUOperands.td"
-include "SPUSchedule.td"
-include "SPUInstrFormats.td"
-include "SPUInstrInfo.td"
-
-//===----------------------------------------------------------------------===//
-// Subtarget features:
-//===----------------------------------------------------------------------===//
-
-def DefaultProc: SubtargetFeature<"", "ProcDirective", "SPU::DEFAULT_PROC", "">;
-def LargeMemFeature:
- SubtargetFeature<"large_mem","UseLargeMem", "true",
- "Use large (>256) LSA memory addressing [default = false]">;
-
-def SPURev0 : Processor<"v0", SPUItineraries, [DefaultProc]>;
-
-//===----------------------------------------------------------------------===//
-// Calling convention:
-//===----------------------------------------------------------------------===//
-
-include "SPUCallingConv.td"
-
-// Target:
-
-def SPUInstrInfo : InstrInfo {
- let isLittleEndianEncoding = 1;
-}
-
-def SPU : Target {
- let InstructionSet = SPUInstrInfo;
-}
diff --git a/lib/Target/CellSPU/SPU128InstrInfo.td b/lib/Target/CellSPU/SPU128InstrInfo.td
deleted file mode 100644
index e051e04733..0000000000
--- a/lib/Target/CellSPU/SPU128InstrInfo.td
+++ /dev/null
@@ -1,41 +0,0 @@
-//===-- SPU128InstrInfo.td - Cell SPU 128-bit operations --*- tablegen -*--===//
-//
-// Cell SPU 128-bit operations
-//
-//===----------------------------------------------------------------------===//
-
-// zext 32->128: Zero extend 32-bit to 128-bit
-def : Pat<(i128 (zext R32C:$rSrc)),
- (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
-
-// zext 64->128: Zero extend 64-bit to 128-bit
-def : Pat<(i128 (zext R64C:$rSrc)),
- (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
-
-// zext 16->128: Zero extend 16-bit to 128-bit
-def : Pat<(i128 (zext R16C:$rSrc)),
- (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
-
-// zext 8->128: Zero extend 8-bit to 128-bit
-def : Pat<(i128 (zext R8C:$rSrc)),
- (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
-
-// anyext 32->128: Zero extend 32-bit to 128-bit
-def : Pat<(i128 (anyext R32C:$rSrc)),
- (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
-
-// anyext 64->128: Zero extend 64-bit to 128-bit
-def : Pat<(i128 (anyext R64C:$rSrc)),
- (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
-
-// anyext 16->128: Zero extend 16-bit to 128-bit
-def : Pat<(i128 (anyext R16C:$rSrc)),
- (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
-
-// anyext 8->128: Zero extend 8-bit to 128-bit
-def : Pat<(i128 (anyext R8C:$rSrc)),
- (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
-
-// Shift left
-def : Pat<(shl GPRC:$rA, R32C:$rB),
- (SHLQBYBIr128 (SHLQBIr128 GPRC:$rA, R32C:$rB), R32C:$rB)>;
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
deleted file mode 100644
index bea33b5362..0000000000
--- a/lib/Target/CellSPU/SPU64InstrInfo.td
+++ /dev/null
@@ -1,408 +0,0 @@
-//====-- SPU64InstrInfo.td - Cell SPU 64-bit operations ---*- tablegen -*--===//
-//
-// Cell SPU 64-bit operations
-//
-//===----------------------------------------------------------------------===//
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// 64-bit comparisons:
-//
-// 1. The instruction sequences for vector vice scalar differ by a
-// constant. In the scalar case, we're only interested in the
-// top two 32-bit slots, whereas we're interested in an exact
-// all-four-slot match in the vector case.
-//
-// 2. There are no "immediate" forms, since loading 64-bit constants
-// could be a constant pool load.
-//
-// 3. i64 setcc results are i32, which are subsequently converted to a FSM
-// mask when used in a select pattern.
-//
-// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO)
-// [Note: this may be moot, since gb produces v4i32 or r32.]
-//
-// 5. The code sequences for r64 and v2i64 are probably overly conservative,
-// compared to the code that gcc produces.
-//
-// M00$E B!tes Kan be Pretty N@sTi!!!!! (apologies to Monty!)
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-// selb instruction definition for i64. Note that the selection mask is
-// a vector, produced by various forms of FSM:
-def SELBr64_cond:
- SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
- [/* no pattern */]>;
-
-// The generic i64 select pattern, which assumes that the comparison result
-// is in a 32-bit register that contains a select mask pattern (i.e., gather
-// bits result):
-
-def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
- (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
-
-// select the negative condition:
-class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
- Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
- (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>;
-
-// setcc the negative condition:
-class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
- Pat<(cond R64C:$rA, R64C:$rB),
- (XORIr32 compare.Fragment, -1)>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// The i64 seteq fragment that does the scalar->vector conversion and
-// comparison:
-def CEQr64compare:
- CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
- (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>;
-
-// The i64 seteq fragment that does the vector comparison
-def CEQv2i64compare:
- CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>;
-
-// i64 seteq (equality): the setcc result is i32, which is converted to a
-// vector FSM mask when used in a select pattern.
-//
-// v2i64 seteq (equality): the setcc result is v4i32
-multiclass CompareEqual64 {
- // Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>;
- def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>;
-
- // SELB mask from FSM:
- def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
- (FSMv4i32 CEQr64compare.Fragment), R32C))>;
- def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
- (FSMv4i32 CEQv2i64compare.Fragment), R32C))>;
-}
-
-defm I64EQ: CompareEqual64;
-
-def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
-def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>;
-
-// i64 setne:
-def : I64SETCCNegCond<setne, I64EQr64>;
-def : I64SELECTNegCond<setne, I64EQr64>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// i64 setugt/setule:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def CLGTr64ugt:
- CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
- (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
-
-def CLGTr64eq:
- CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
- (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
-
-def CLGTr64compare:
- CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
- (XSWDv2i64 CLGTr64ugt.Fragment),
- CLGTr64eq.Fragment)>;
-
-def CLGTv2i64ugt:
- CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>;
-
-def CLGTv2i64eq:
- CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
-
-def CLGTv2i64compare:
- CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment,
- (XSWDv2i64 CLGTr64ugt.Fragment),
- CLGTv2i64eq.Fragment)>;
-
-multiclass CompareLogicalGreaterThan64 {
- // Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>;
- def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
-
- // SELB mask from FSM:
- def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
- (FSMv4i32 CLGTr64compare.Fragment), R32C))>;
- def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
- (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>;
-}
-
-defm I64LGT: CompareLogicalGreaterThan64;
-
-def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>;
-//def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
-// I64LGTv2i64.Fragment>;
-
-// i64 setult:
-def : I64SETCCNegCond<setule, I64LGTr64>;
-def : I64SELECTNegCond<setule, I64LGTr64>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// i64 setuge/setult:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def CLGEr64compare:
- CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment,
- CLGTr64eq.Fragment)), 0xb)>;
-
-def CLGEv2i64compare:
- CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment,
- CLGTv2i64eq.Fragment)), 0xf)>;
-
-multiclass CompareLogicalGreaterEqual64 {
- // Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>;
- def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
-
- // SELB mask from FSM:
- def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
- (FSMv4i32 CLGEr64compare.Fragment), R32C))>;
- def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
- (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>;
-}
-
-defm I64LGE: CompareLogicalGreaterEqual64;
-
-def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>;
-def : Pat<(v2i64 (setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
- I64LGEv2i64.Fragment>;
-
-
-// i64 setult:
-def : I64SETCCNegCond<setult, I64LGEr64>;
-def : I64SELECTNegCond<setult, I64LGEr64>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// i64 setgt/setle:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def CGTr64sgt:
- CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
- (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
-
-def CGTr64eq:
- CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
- (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
-
-def CGTr64compare:
- CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
- (XSWDv2i64 CGTr64sgt.Fragment),
- CGTr64eq.Fragment)>;
-
-def CGTv2i64sgt:
- CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>;
-
-def CGTv2i64eq:
- CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
-
-def CGTv2i64compare:
- CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment,
- (XSWDv2i64 CGTr64sgt.Fragment),
- CGTv2i64eq.Fragment)>;
-
-multiclass CompareGreaterThan64 {
- // Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>;
- def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
-
- // SELB mask from FSM:
- def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
- (FSMv4i32 CGTr64compare.Fragment), R32C))>;
- def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
- (FSMv4i32 CGTv2i64compare.Fragment), R32C))>;
-}
-
-defm I64GT: CompareLogicalGreaterThan64;
-
-def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>;
-//def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
-// I64GTv2i64.Fragment>;
-
-// i64 setult:
-def : I64SETCCNegCond<setle, I64GTr64>;
-def : I64SELECTNegCond<setle, I64GTr64>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// i64 setge/setlt:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def CGEr64compare:
- CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment,
- CGTr64eq.Fragment)), 0xb)>;
-
-def CGEv2i64compare:
- CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment,
- CGTv2i64eq.Fragment)), 0xf)>;
-
-multiclass CompareGreaterEqual64 {
- // Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>;
- def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
-
- // SELB mask from FSM:
- def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>;
- def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>;
-}
-
-defm I64GE: CompareGreaterEqual64;
-
-def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>;
-def : Pat<(v2i64 (setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
- I64GEv2i64.Fragment>;
-
-// i64 setult:
-def : I64SETCCNegCond<setlt, I64GEr64>;
-def : I64SELECTNegCond<setlt, I64GEr64>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v2i64, i64 add
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class v2i64_add_cg<dag lhs, dag rhs>:
- CodeFrag<(CGv4i32 lhs, rhs)>;
-
-class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>:
- CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>;
-
-class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
- v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
-
-def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
- (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG),
- (COPY_TO_REGCLASS R64C:$rB, VECREG),
- (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
-
-def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
- (v4i32 VECREG:$rCGmask)),
- v2i64_add<(v2i64 VECREG:$rA),
- (v2i64 VECREG:$rB),
- (v4i32 VECREG:$rCGmask)>.Fragment>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v2i64, i64 subtraction
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>;
-
-class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
- CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
-
-def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
- (COPY_TO_REGCLASS
- v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG),
- (COPY_TO_REGCLASS R64C:$rB, VECREG),
- v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG),
- (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment,
- (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
-
-def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
- (v4i32 VECREG:$rCGmask)),
- v2i64_sub<(v2i64 VECREG:$rA),
- (v2i64 VECREG:$rB),
- v2i64_sub_bg<(v2i64 VECREG:$rA),
- (v2i64 VECREG:$rB)>.Fragment,
- (v4i32 VECREG:$rCGmask)>.Fragment>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v2i64, i64 multiply
-//
-// Note: i64 multiply is simply the vector->scalar conversion of the
-// full-on v2i64 multiply, since the entire vector has to be manipulated
-// anyway.
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class v2i64_mul_ahi64<dag rA> :
- CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
-
-class v2i64_mul_bhi64<dag rB> :
- CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
-
-class v2i64_mul_alo64<dag rB> :
- CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
-
-class v2i64_mul_blo64<dag rB> :
- CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
-
-class v2i64_mul_ashlq2<dag rA>:
- CodeFrag<(SHLQBYIv4i32 rA, 0x2)>;
-
-class v2i64_mul_ashlq4<dag rA>:
- CodeFrag<(SHLQBYIv4i32 rA, 0x4)>;
-
-class v2i64_mul_bshlq2<dag rB> :
- CodeFrag<(SHLQBYIv4i32 rB, 0x2)>;
-
-class v2i64_mul_bshlq4<dag rB> :
- CodeFrag<(SHLQBYIv4i32 rB, 0x4)>;
-
-class v2i64_highprod<dag rA, dag rB>:
- CodeFrag<(Av4i32
- (Av4i32
- (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment, // a1 x b3
- v2i64_mul_ahi64<rA>.Fragment),
- (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment, // a0 x b3
- v2i64_mul_bshlq4<rB>.Fragment)),
- (Av4i32
- (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
- v2i64_mul_ashlq4<rA>.Fragment),
- (Av4i32
- (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
- v2i64_mul_bhi64<rB>.Fragment),
- (Av4i32
- (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
- v2i64_mul_bhi64<rB>.Fragment),
- (Av4i32
- (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment,
- v2i64_mul_bshlq2<rB>.Fragment),
- (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment,
- v2i64_mul_bshlq2<rB>.Fragment))))))>;
-
-class v2i64_mul_a3_b3<dag rA, dag rB>:
- CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment,
- v2i64_mul_blo64<rB>.Fragment)>;
-
-class v2i64_mul_a2_b3<dag rA, dag rB>:
- CodeFrag<(SELBv4i32 (SHLQBYIv4i32
- (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment,
- v2i64_mul_bshlq2<rB>.Fragment), 0x2),
- (ILv4i32 0),
- (FSMBIv4i32 0xc3c3))>;
-
-class v2i64_mul_a3_b2<dag rA, dag rB>:
- CodeFrag<(SELBv4i32 (SHLQBYIv4i32
- (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment,
- v2i64_mul_ashlq2<rA>.Fragment), 0x2),
- (ILv4i32 0),
- (FSMBIv4i32 0xc3c3))>;
-
-class v2i64_lowsum<dag rA, dag rB, dag rCGmask>:
- v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment,
- v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment,
- v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>;
-
-class v2i64_mul<dag rA, dag rB, dag rCGmask>:
- v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment,
- (SELBv4i32 v2i64_highprod<rA, rB>.Fragment,
- (ILv4i32 0),
- (FSMBIv4i32 0x0f0f)),
- rCGmask>;
-
-def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
- (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG),
- (COPY_TO_REGCLASS R64C:$rB, VECREG),
- (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
-
-def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
- (v4i32 VECREG:$rCGmask)),
- v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
- (v4i32 VECREG:$rCGmask)>.Fragment>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// f64 comparisons
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-// selb instruction definition for i64. Note that the selection mask is
-// a vector, produced by various forms of FSM:
-def SELBf64_cond:
- SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC),
- [(set R64FP:$rT,
- (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>;
diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp
deleted file mode 100644
index 3396e8b1ef..0000000000
--- a/lib/Target/CellSPU/SPUAsmPrinter.cpp
+++ /dev/null
@@ -1,333 +0,0 @@
-//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to Cell SPU assembly language. This printer
-// is the output mechanism used by `llc'.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asmprinter"
-#include "SPU.h"
-#include "SPUTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
- class SPUAsmPrinter : public AsmPrinter {
- public:
- explicit SPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) :
- AsmPrinter(TM, Streamer) {}
-
- virtual const char *getPassName() const {
- return "STI CBEA SPU Assembly Printer";
- }
-
- /// printInstruction - This method is automatically generated by tablegen
- /// from the instruction set description.
- void printInstruction(const MachineInstr *MI, raw_ostream &OS);
- static const char *getRegisterName(unsigned RegNo);
-
-
- void EmitInstruction(const MachineInstr *MI) {
- SmallString<128> Str;
- raw_svector_ostream OS(Str);
- printInstruction(MI, OS);
- OutStreamer.EmitRawText(OS.str());
- }
- void printOp(const MachineOperand &MO, raw_ostream &OS);
-
- void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(OpNo);
- if (MO.isReg()) {
- O << getRegisterName(MO.getReg());
- } else if (MO.isImm()) {
- O << MO.getImm();
- } else {
- printOp(MO, O);
- }
- }
-
- bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
- bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
-
-
- void
- printU7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- unsigned int value = MI->getOperand(OpNo).getImm();
- assert(value < (1 << 8) && "Invalid u7 argument");
- O << value;
- }
-
- void
- printShufAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- char value = MI->getOperand(OpNo).getImm();
- O << (int) value;
- O << "(";
- printOperand(MI, OpNo+1, O);
- O << ")";
- }
-
- void
- printS16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- O << (short) MI->getOperand(OpNo).getImm();
- }
-
- void
- printU16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- O << (unsigned short)MI->getOperand(OpNo).getImm();
- }
-
- void
- printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- // When used as the base register, r0 reads constant zero rather than
- // the value contained in the register. For this reason, the darwin
- // assembler requires that we print r0 as 0 (no r) when used as the base.
- const MachineOperand &MO = MI->getOperand(OpNo);
- O << getRegisterName(MO.getReg()) << ", ";
- printOperand(MI, OpNo+1, O);
- }
-
- void
- printU18ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- unsigned int value = MI->getOperand(OpNo).getImm();
- assert(value <= (1 << 19) - 1 && "Invalid u18 argument");
- O << value;
- }
-
- void
- printS10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- short value = MI->getOperand(OpNo).getImm();
- assert((value >= -(1 << 9) && value <= (1 << 9) - 1)
- && "Invalid s10 argument");
- O << value;
- }
-
- void
- printU10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- short value = MI->getOperand(OpNo).getImm();
- assert((value <= (1 << 10) - 1) && "Invalid u10 argument");
- O << value;
- }
-
- void
- printDFormAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- assert(MI->getOperand(OpNo).isImm() &&
- "printDFormAddr first operand is not immediate");
- int64_t value = int64_t(MI->getOperand(OpNo).getImm());
- int16_t value16 = int16_t(value);
- assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1)
- && "Invalid dform s10 offset argument");
- O << (value16 & ~0xf) << "(";
- printOperand(MI, OpNo+1, O);
- O << ")";
- }
-
- void
- printAddr256K(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- /* Note: operand 1 is an offset or symbol name. */
- if (MI->getOperand(OpNo).isImm()) {
- printS16ImmOperand(MI, OpNo, O);
- } else {
- printOp(MI->getOperand(OpNo), O);
- if (MI->getOperand(OpNo+1).isImm()) {
- int displ = int(MI->getOperand(OpNo+1).getImm());
- if (displ > 0)
- O << "+" << displ;
- else if (displ < 0)
- O << displ;
- }
- }
- }
-
- void printCallOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- printOp(MI->getOperand(OpNo), O);
- }
-
- void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- printOp(MI->getOperand(OpNo), O);
- }
-
- void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- // Used to generate a ".-<target>", but it turns out that the assembler
- // really wants the target.
- //
- // N.B.: This operand is used for call targets. Branch hints are another
- // animal entirely.
- printOp(MI->getOperand(OpNo), O);
- }
-
- void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- if (MI->getOperand(OpNo).isImm()) {
- printS16ImmOperand(MI, OpNo, O);
- } else {
- printOp(MI->getOperand(OpNo), O);
- O << "@h";
- }
- }
-
- void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- if (MI->getOperand(OpNo).isImm()) {
- printS16ImmOperand(MI, OpNo, O);
- } else {
- printOp(MI->getOperand(OpNo), O);
- O << "@l";
- }
- }
-
- /// Print local store address
- void printSymbolLSA(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- printOp(MI->getOperand(OpNo), O);
- }
-
- void printROTHNeg7Imm(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).isImm()) {
- int value = (int) MI->getOperand(OpNo).getImm();
- assert((value >= 0 && value < 16)
- && "Invalid negated immediate rotate 7-bit argument");
- O << -value;
- } else {
- llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm");
- }
- }
-
- void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O){
- assert(MI->getOperand(OpNo).isImm() &&
- "Invalid/non-immediate rotate amount in printRotateNeg7Imm");
- int value = (int) MI->getOperand(OpNo).getImm();
- assert((value >= 0 && value <= 32)
- && "Invalid negated immediate rotate 7-bit argument");
- O << -value;
- }
- };
-} // end of anonymous namespace
-
-// Include the auto-generated portion of the assembly writer
-#include "SPUGenAsmWriter.inc"
-
-void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
- switch (MO.getType()) {
- case MachineOperand::MO_Immediate:
- report_fatal_error("printOp() does not handle immediate values");
-
- case MachineOperand::MO_MachineBasicBlock:
- O << *MO.getMBB()->getSymbol();
- return;
- case MachineOperand::MO_JumpTableIndex:
- O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
- << '_' << MO.getIndex();
- return;
- case MachineOperand::MO_ConstantPoolIndex:
- O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
- << '_' << MO.getIndex();
- return;
- case MachineOperand::MO_ExternalSymbol:
- // Computing the address of an external symbol, not calling it.
- if (TM.getRelocationModel() != Reloc::Static) {
- O << "L" << MAI->getGlobalPrefix() << MO.getSymbolName()
- << "$non_lazy_ptr";
- return;
- }
- O << *GetExternalSymbolSymbol(MO.getSymbolName());
- return;
- case MachineOperand::MO_GlobalAddress:
- // External or weakly linked global variables need non-lazily-resolved
- // stubs
- if (TM.getRelocationModel() != Reloc::Static) {
- const GlobalValue *GV = MO.getGlobal();
- if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
- GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) {
- O << *GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
- return;
- }
- }
- O << *Mang->getSymbol(MO.getGlobal());
- return;
- case MachineOperand::MO_MCSymbol:
- O << *(MO.getMCSymbol());
- return;
- default:
- O << "<unknown operand type: " << MO.getType() << ">";
- return;
- }
-}
-
-/// PrintAsmOperand - Print out an operand for an inline asm expression.
-///
-bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
- const char *ExtraCode, raw_ostream &O) {
- // Does this asm operand have a single letter operand modifier?
- if (ExtraCode && ExtraCode[0]) {
- if (ExtraCode[1] != 0) return true; // Unknown modifier.
-
- switch (ExtraCode[0]) {
- default:
- // See if this is a generic print operand
- return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
- case 'L': // Write second word of DImode reference.
- // Verify that this operand has two consecutive registers.
- if (!MI->getOperand(OpNo).isReg() ||
- OpNo+1 == MI->getNumOperands() ||
- !MI->getOperand(OpNo+1).isReg())
- return true;
- ++OpNo; // Return the high-part.
- break;
- }
- }
-
- printOperand(MI, OpNo, O);
- return false;
-}
-
-bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNo, unsigned AsmVariant,
- const char *ExtraCode,
- raw_ostream &O) {
- if (ExtraCode && ExtraCode[0])
- return true; // Unknown modifier.
- printMemRegReg(MI, OpNo, O);
- return false;
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeCellSPUAsmPrinter() {
- RegisterAsmPrinter<SPUAsmPrinter> X(TheCellSPUTarget);
-}
diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td
deleted file mode 100644
index 9bc6be7986..0000000000
--- a/lib/Target/CellSPU/SPUCallingConv.td
+++ /dev/null
@@ -1,53 +0,0 @@
-//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This describes the calling conventions for the STI Cell SPU architecture.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Return Value Calling Convention
-//===----------------------------------------------------------------------===//
-
-// Return-value convention for Cell SPU: return value to be passed in reg 3-74
-def RetCC_SPU : CallingConv<[
- CCIfType<[i8,i16,i32,i64,i128,f32,f64,v16i8,v8i16,v4i32,v2i64,v4f32,v2f64],
- CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
- R12, R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, R32, R33, R34, R35, R36, R37, R38,
- R39, R40, R41, R42, R43, R44, R45, R46, R47,
- R48, R49, R50, R51, R52, R53, R54, R55, R56,
- R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74]>>
-]>;
-
-
-//===----------------------------------------------------------------------===//
-// CellSPU Argument Calling Conventions
-//===----------------------------------------------------------------------===//
-def CCC_SPU : CallingConv<[
- CCIfType<[i8, i16, i32, i64, i128, f32, f64,
- v16i8, v8i16, v4i32, v4f32, v2i64, v2f64],
- CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
- R12, R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, R32, R33, R34, R35, R36, R37, R38,
- R39, R40, R41, R42, R43, R44, R45, R46, R47,
- R48, R49, R50, R51, R52, R53, R54, R55, R56,
- R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74]>>,
- // Integer/FP values get stored in stack slots that are 8 bytes in size and
- // 8-byte aligned if there are no more registers to hold them.
- CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
-
- // Vectors get 16-byte stack slots that are 16-byte aligned.
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCAssignToStack<16, 16>>
-]>;
diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp
deleted file mode 100644
index f01199515a..0000000000
--- a/lib/Target/CellSPU/SPUFrameLowering.cpp
+++ /dev/null
@@ -1,256 +0,0 @@
-//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Top-level implementation for the Cell SPU target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUFrameLowering.h"
-#include "SPU.h"
-#include "SPUInstrBuilder.h"
-#include "SPUInstrInfo.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CommandLine.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// SPUFrameLowering:
-//===----------------------------------------------------------------------===//
-
-SPUFrameLowering::SPUFrameLowering(const SPUSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
- Subtarget(sti) {
- LR[0].first = SPU::R0;
- LR[0].second = 16;
-}
-
-
-//--------------------------------------------------------------------------
-// hasFP - Return true if the specified function actually has a dedicated frame
-// pointer register. This is true if the function needs a frame pointer and has
-// a non-zero stack size.
-bool SPUFrameLowering::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
-
- return MFI->getStackSize() &&
- (MF.getTarget().Options.DisableFramePointerElim(MF) ||
- MFI->hasVarSizedObjects());
-}
-
-
-/// determineFrameLayout - Determine the size of the frame and maximum call
-/// frame size.
-void SPUFrameLowering::determineFrameLayout(MachineFunction &MF) const {
- MachineFrameInfo *MFI = MF.getFrameInfo();
-
- // Get the number of bytes to allocate from the FrameInfo
- unsigned FrameSize = MFI->getStackSize();
-
- // Get the alignments provided by the target, and the maximum alignment
- // (if any) of the fixed frame objects.
- unsigned TargetAlign = getStackAlignment();
- unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment());
- assert(isPowerOf2_32(Align) && "Alignment is not power of 2");
- unsigned AlignMask = Align - 1;
-
- // Get the maximum call frame size of all the calls.
- unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
-
- // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
- // that allocations will be aligned.
- if (MFI->hasVarSizedObjects())
- maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
-
- // Update maximum call frame size.
- MFI->setMaxCallFrameSize(maxCallFrameSize);
-
- // Include call frame size in total.
- FrameSize += maxCallFrameSize;
-
- // Make sure the frame is aligned.
- FrameSize = (FrameSize + AlignMask) & ~AlignMask;
-
- // Update frame info.
- MFI->setStackSize(FrameSize);
-}
-
-void SPUFrameLowering::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
- MachineBasicBlock::iterator MBBI = MBB.begin();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const SPUInstrInfo &TII =
- *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
- MachineModuleInfo &MMI = MF.getMMI();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
- // Prepare for debug frame info.
- bool hasDebugInfo = MMI.hasDebugInfo();
- MCSymbol *FrameLabel = 0;
-
- // Move MBBI back to the beginning of the function.
- MBBI = MBB.begin();
-
- // Work out frame sizes.
- determineFrameLayout(MF);
- int FrameSize = MFI->getStackSize();
-
- assert((FrameSize & 0xf) == 0
- && "SPURegisterInfo::emitPrologue: FrameSize not aligned");
-
- // the "empty" frame size is 16 - just the register scavenger spill slot
- if (FrameSize > 16 || MFI->adjustsStack()) {
- FrameSize = -(FrameSize + SPUFrameLowering::minStackSize());
- if (hasDebugInfo) {
- // Mark effective beginning of when frame pointer becomes valid.
- FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel);
- }
-
- // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
- // for the ABI
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
- .addReg(SPU::R1);
- if (isInt<10>(FrameSize)) {
- // Spill $sp to adjusted $sp
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize)
- .addReg(SPU::R1);
- // Adjust $sp by required amout
- BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
- .addImm(FrameSize);
- } else if (isInt<16>(FrameSize)) {
- // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
- // $r2 to adjust $sp:
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
- .addImm(-16)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
- .addImm(FrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
- .addReg(SPU::R2)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
- .addReg(SPU::R1)
- .addReg(SPU::R2);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2)
- .addReg(SPU::R2)
- .addImm(16);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
- .addReg(SPU::R2)
- .addReg(SPU::R1);
- } else {
- report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
- }
-
- if (hasDebugInfo) {
- std::vector<MachineMove> &Moves = MMI.getFrameMoves();
-
- // Show update of SP.
- MachineLocation SPDst(MachineLocation::VirtualFP);
- MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
- Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
-
- // Add callee saved registers to move list.
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
- unsigned Reg = CSI[I].getReg();
- if (Reg == SPU::R0) continue;
- MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
- MachineLocation CSSrc(Reg);
- Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
- }
-
- // Mark effective beginning of when frame pointer is ready.
- MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel);
-
- MachineLocation FPDst(SPU::R1);
- MachineLocation FPSrc(MachineLocation::VirtualFP);
- Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
- }
- }
-}
-
-void SPUFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- const SPUInstrInfo &TII =
- *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- int FrameSize = MFI->getStackSize();
- int LinkSlotOffset = SPUFrameLowering::stackSlotSize();
- DebugLoc dl = MBBI->getDebugLoc();
-
- assert(MBBI->getOpcode() == SPU::RET &&
- "Can only insert epilog into returning blocks");
- assert((FrameSize & 0xf) == 0 && "FrameSize not aligned");
-
- // the "empty" frame size is 16 - just the register scavenger spill slot
- if (FrameSize > 16 || MFI->adjustsStack()) {
- FrameSize = FrameSize + SPUFrameLowering::minStackSize();
- if (isInt<10>(FrameSize + LinkSlotOffset)) {
- // Reload $lr, adjust $sp by required amount
- // Note: We do this to slightly improve dual issue -- not by much, but it
- // is an opportunity for dual issue.
- BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
- .addImm(FrameSize + LinkSlotOffset)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1)
- .addReg(SPU::R1)
- .addImm(FrameSize);
- } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
- // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
- // $r2 to adjust $sp:
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
- .addImm(16)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
- .addImm(FrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
- .addReg(SPU::R1)
- .addReg(SPU::R2);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
- .addImm(16)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
- addReg(SPU::R2)
- .addImm(16);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
- .addReg(SPU::R2)
- .addReg(SPU::R1);
- } else {
- report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
- }
- }
-}
-
-void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const{
- // Mark LR and SP unused, since the prolog spills them to stack and
- // we don't want anyone else to spill them for us.
- //
- // Also, unless R2 is really used someday, don't spill it automatically.
- MF.getRegInfo().setPhysRegUnused(SPU::R0);
- MF.getRegInfo().setPhysRegUnused(SPU::R1);
- MF.getRegInfo().setPhysRegUnused(SPU::R2);
-
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetRegisterClass *RC = &SPU::R32CRegClass;
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
-}
diff --git a/lib/Target/CellSPU/SPUFrameLowering.h b/lib/Target/CellSPU/SPUFrameLowering.h
deleted file mode 100644
index 11c52818dd..0000000000
--- a/lib/Target/CellSPU/SPUFrameLowering.h
+++ /dev/null
@@ -1,80 +0,0 @@
-//===-- SPUFrameLowering.h - SPU Frame Lowering stuff ----------*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains CellSPU frame information that doesn't fit anywhere else
-// cleanly...
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_FRAMEINFO_H
-#define SPU_FRAMEINFO_H
-
-#include "SPURegisterInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
- class SPUSubtarget;
-
- class SPUFrameLowering: public TargetFrameLowering {
- const SPUSubtarget &Subtarget;
- std::pair<unsigned, int> LR[1];
-
- public:
- SPUFrameLowering(const SPUSubtarget &sti);
-
- //! Determine the frame's layour
- void determineFrameLayout(MachineFunction &MF) const;
-
- /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
- /// the function.
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
- //! Prediate: Target has dedicated frame pointer
- bool hasFP(const MachineFunction &MF) const;
-
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
-
- //! Return a function's saved spill slots
- /*!
- For CellSPU, a function's saved spill slots is just the link register.
- */
- const std::pair<unsigned, int> *
- getCalleeSaveSpillSlots(unsigned &NumEntries) const;
-
- //! Stack slot size (16 bytes)
- static int stackSlotSize() {
- return 16;
- }
- //! Maximum frame offset representable by a signed 10-bit integer
- /*!
- This is the maximum frame offset that can be expressed as a 10-bit
- integer, used in D-form addresses.
- */
- static int maxFrameOffset() {
- return ((1 << 9) - 1) * stackSlotSize();
- }
- //! Minimum frame offset representable by a signed 10-bit integer
- static int minFrameOffset() {
- return -(1 << 9) * stackSlotSize();
- }
- //! Minimum frame size (enough to spill LR + SP)
- static int minStackSize() {
- return (2 * stackSlotSize());
- }
- //! Convert frame index to stack offset
- static int FItoStackOffset(int frame_index) {
- return frame_index * stackSlotSize();
- }
- };
-}
-
-#endif
diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/lib/Target/CellSPU/SPUHazardRecognizers.cpp
deleted file mode 100644
index 67a83f16a6..0000000000
--- a/lib/Target/CellSPU/SPUHazardRecognizers.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-//===-- SPUHazardRecognizers.cpp - Cell Hazard Recognizer Impls -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements hazard recognizers for scheduling on Cell SPU
-// processors.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "sched"
-
-#include "SPUHazardRecognizers.h"
-#include "SPU.h"
-#include "SPUInstrInfo.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Cell SPU hazard recognizer
-//
-// This is the pipeline hazard recognizer for the Cell SPU processor. It does
-// very little right now.
-//===----------------------------------------------------------------------===//
-
-/// Return the pipeline hazard type encountered or generated by this
-/// instruction. Currently returns NoHazard.
-///
-/// \return NoHazard
-ScheduleHazardRecognizer::HazardType
-SPUHazardRecognizer::getHazardType(SUnit *SU, int Stalls)
-{
- // Initial thoughts on how to do this, but this code cannot work unless the
- // function's prolog and epilog code are also being scheduled so that we can
- // accurately determine which pipeline is being scheduled.
-#if 0
- assert(Stalls == 0 && "SPU hazards don't yet support scoreboard lookahead");
-
- const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
- ScheduleHazardRecognizer::HazardType retval = NoHazard;
- bool mustBeOdd = false;
-
- switch (Node->getOpcode()) {
- case SPU::LQDv16i8:
- case SPU::LQDv8i16:
- case SPU::LQDv4i32:
- case SPU::LQDv4f32:
- case SPU::LQDv2f64:
- case SPU::LQDr128:
- case SPU::LQDr64:
- case SPU::LQDr32:
- case SPU::LQDr16:
- case SPU::LQAv16i8:
- case SPU::LQAv8i16:
- case SPU::LQAv4i32:
- case SPU::LQAv4f32:
- case SPU::LQAv2f64:
- case SPU::LQAr128:
- case SPU::LQAr64:
- case SPU::LQAr32:
- case SPU::LQXv4i32:
- case SPU::LQXr128:
- case SPU::LQXr64:
- case SPU::LQXr32:
- case SPU::LQXr16:
- case SPU::STQDv16i8:
- case SPU::STQDv8i16:
- case SPU::STQDv4i32:
- case SPU::STQDv4f32:
- case SPU::STQDv2f64:
- case SPU::STQDr128:
- case SPU::STQDr64:
- case SPU::STQDr32:
- case SPU::STQDr16:
- case SPU::STQDr8:
- case SPU::STQAv16i8:
- case SPU::STQAv8i16:
- case SPU::STQAv4i32:
- case SPU::STQAv4f32:
- case SPU::STQAv2f64:
- case SPU::STQAr128:
- case SPU::STQAr64:
- case SPU::STQAr32:
- case SPU::STQAr16:
- case SPU::STQAr8:
- case SPU::STQXv16i8:
- case SPU::STQXv8i16:
- case SPU::STQXv4i32:
- case SPU::STQXv4f32:
- case SPU::STQXv2f64:
- case SPU::STQXr128:
- case SPU::STQXr64:
- case SPU::STQXr32:
- case SPU::STQXr16:
- case SPU::STQXr8:
- case SPU::RET:
- mustBeOdd = true;
- break;
- default:
- // Assume that this instruction can be on the even pipe
- break;
- }
-
- if (mustBeOdd && !EvenOdd)
- retval = Hazard;
-
- DEBUG(errs() << "SPUHazardRecognizer EvenOdd " << EvenOdd << " Hazard "
- << retval << "\n");
- EvenOdd ^= 1;
- return retval;
-#else
- return NoHazard;
-#endif
-}
-
-void SPUHazardRecognizer::EmitInstruction(SUnit *SU)
-{
-}
-
-void SPUHazardRecognizer::AdvanceCycle()
-{
- DEBUG(errs() << "SPUHazardRecognizer::AdvanceCycle\n");
-}
-
-void SPUHazardRecognizer::EmitNoop()
-{
- AdvanceCycle();
-}
diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.h b/lib/Target/CellSPU/SPUHazardRecognizers.h
deleted file mode 100644
index 30acaeaa36..0000000000
--- a/lib/Target/CellSPU/SPUHazardRecognizers.h
+++ /dev/null
@@ -1,37 +0,0 @@
-//===-- SPUHazardRecognizers.h - Cell SPU Hazard Recognizer -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines hazard recognizers for scheduling on the Cell SPU
-// processor.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPUHAZRECS_H
-#define SPUHAZRECS_H
-
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-
-namespace llvm {
-
-class TargetInstrInfo;
-
-/// SPUHazardRecognizer
-class SPUHazardRecognizer : public ScheduleHazardRecognizer
-{
-public:
- SPUHazardRecognizer(const TargetInstrInfo &/*TII*/) {}
- virtual HazardType getHazardType(SUnit *SU, int Stalls);
- virtual void EmitInstruction(SUnit *SU);
- virtual void AdvanceCycle();
- virtual void EmitNoop();
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
deleted file mode 100644
index 5d5061054b..0000000000
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ /dev/null
@@ -1,1192 +0,0 @@
-//===-- SPUISelDAGToDAG.cpp - CellSPU pattern matching inst selector ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a pattern matching instruction selector for the Cell SPU,
-// converting from a legalized dag to a SPU-target dag.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPU.h"
-#include "SPUTargetMachine.h"
-#include "SPUHazardRecognizers.h"
-#include "SPUFrameLowering.h"
-#include "SPUTargetMachine.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Constants.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
- //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
- bool
- isI32IntS10Immediate(ConstantSDNode *CN)
- {
- return isInt<10>(CN->getSExtValue());
- }
-
- //! ConstantSDNode predicate for i32 unsigned 10-bit immediate values
- bool
- isI32IntU10Immediate(ConstantSDNode *CN)
- {
- return isUInt<10>(CN->getSExtValue());
- }
-
- //! ConstantSDNode predicate for i16 sign-extended, 10-bit immediate values
- bool
- isI16IntS10Immediate(ConstantSDNode *CN)
- {
- return isInt<10>(CN->getSExtValue());
- }
-
- //! ConstantSDNode predicate for i16 unsigned 10-bit immediate values
- bool
- isI16IntU10Immediate(ConstantSDNode *CN)
- {
- return isUInt<10>((short) CN->getZExtValue());
- }
-
- //! ConstantSDNode predicate for signed 16-bit values
- /*!
- \param CN The constant SelectionDAG node holding the value
- \param Imm The returned 16-bit value, if returning true
-
- This predicate tests the value in \a CN to see whether it can be
- represented as a 16-bit, sign-extended quantity. Returns true if
- this is the case.
- */
- bool
- isIntS16Immediate(ConstantSDNode *CN, short &Imm)
- {
- EVT vt = CN->getValueType(0);
- Imm = (short) CN->getZExtValue();
- if (vt.getSimpleVT() >= MVT::i1 && vt.getSimpleVT() <= MVT::i16) {
- return true;
- } else if (vt == MVT::i32) {
- int32_t i_val = (int32_t) CN->getZExtValue();
- return i_val == SignExtend32<16>(i_val);
- } else {
- int64_t i_val = (int64_t) CN->getZExtValue();
- return i_val == SignExtend64<16>(i_val);
- }
- }
-
- //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext.
- static bool
- isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm)
- {
- EVT vt = FPN->getValueType(0);
- if (vt == MVT::f32) {
- int val = FloatToBits(FPN->getValueAPF().convertToFloat());
- if (val == SignExtend32<16>(val)) {
- Imm = (short) val;
- return true;
- }
- }
-
- return false;
- }
-
- //! Generate the carry-generate shuffle mask.
- SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
- SmallVector<SDValue, 16 > ShufBytes;
-
- // Create the shuffle mask for "rotating" the borrow up one register slot
- // once the borrow is generated.
- ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
-
- return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- &ShufBytes[0], ShufBytes.size());
- }
-
- //! Generate the borrow-generate shuffle mask
- SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
- SmallVector<SDValue, 16 > ShufBytes;
-
- // Create the shuffle mask for "rotating" the borrow up one register slot
- // once the borrow is generated.
- ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
-
- return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- &ShufBytes[0], ShufBytes.size());
- }
-
- //===------------------------------------------------------------------===//
- /// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine
- /// instructions for SelectionDAG operations.
- ///
- class SPUDAGToDAGISel :
- public SelectionDAGISel
- {
- const SPUTargetMachine &TM;
- const SPUTargetLowering &SPUtli;
- unsigned GlobalBaseReg;
-
- public:
- explicit SPUDAGToDAGISel(SPUTargetMachine &tm) :
- SelectionDAGISel(tm),
- TM(tm),
- SPUtli(*tm.getTargetLowering())
- { }
-
- virtual bool runOnMachineFunction(MachineFunction &MF) {
- // Make sure we re-emit a set of the global base reg if necessary
- GlobalBaseReg = 0;
- SelectionDAGISel::runOnMachineFunction(MF);
- return true;
- }
-
- /// getI32Imm - Return a target constant with the specified value, of type
- /// i32.
- inline SDValue getI32Imm(uint32_t Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i32);
- }
-
- /// getSmallIPtrImm - Return a target constant of pointer type.
- inline SDValue getSmallIPtrImm(unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
- }
-
- SDNode *emitBuildVector(SDNode *bvNode) {
- EVT vecVT = bvNode->getValueType(0);
- DebugLoc dl = bvNode->getDebugLoc();
-
- // Check to see if this vector can be represented as a CellSPU immediate
- // constant by invoking all of the instruction selection predicates:
- if (((vecVT == MVT::v8i16) &&
- (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) ||
- ((vecVT == MVT::v4i32) &&
- ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
- (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
- (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
- (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) ||
- ((vecVT == MVT::v2i64) &&
- ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
- (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
- (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) {
- HandleSDNode Dummy(SDValue(bvNode, 0));
- if (SDNode *N = Select(bvNode))
- return N;
- return Dummy.getValue().getNode();
- }
-
- // No, need to emit a constant pool spill:
- std::vector<Constant*> CV;
-
- for (size_t i = 0; i < bvNode->getNumOperands(); ++i) {
- ConstantSDNode *V = cast<ConstantSDNode > (bvNode->getOperand(i));
- CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
- }
-
- const Constant *CP = ConstantVector::get(CV);
- SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
- unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
- SDValue CGPoolOffset =
- SPU::LowerConstantPool(CPIdx, *CurDAG, TM);
-
- HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl,
- CurDAG->getEntryNode(), CGPoolOffset,
- MachinePointerInfo::getConstantPool(),
- false, false, false, Alignment));
- CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue());
- if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
- return N;
- return Dummy.getValue().getNode();
- }
-
- /// Select - Convert the specified operand from a target-independent to a
- /// target-specific node if it hasn't already been changed.
- SDNode *Select(SDNode *N);
-
- //! Emit the instruction sequence for i64 shl
- SDNode *SelectSHLi64(SDNode *N, EVT OpVT);
-
- //! Emit the instruction sequence for i64 srl
- SDNode *SelectSRLi64(SDNode *N, EVT OpVT);
-
- //! Emit the instruction sequence for i64 sra
- SDNode *SelectSRAi64(SDNode *N, EVT OpVT);
-
- //! Emit the necessary sequence for loading i64 constants:
- SDNode *SelectI64Constant(SDNode *N, EVT OpVT, DebugLoc dl);
-
- //! Alternate instruction emit sequence for loading i64 constants
- SDNode *SelectI64Constant(uint64_t i64const, EVT OpVT, DebugLoc dl);
-
- //! Returns true if the address N is an A-form (local store) address
- bool SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Index);
-
- //! D-form address predicate
- bool SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Index);
-
- /// Alternate D-form address using i7 offset predicate
- bool SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp,
- SDValue &Base);
-
- /// D-form address selection workhorse
- bool DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Disp,
- SDValue &Base, int minOffset, int maxOffset);
-
- //! Address predicate if N can be expressed as an indexed [r+r] operation.
- bool SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Index);
-
- /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
- /// inline asm expressions.
- virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps) {
- SDValue Op0, Op1;
- switch (ConstraintCode) {
- default: return true;
- case 'm': // memory
- if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1)
- && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1))
- SelectXFormAddr(Op.getNode(), Op, Op0, Op1);
- break;
- case 'o': // offsetable
- if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1)
- && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1)) {
- Op0 = Op;
- Op1 = getSmallIPtrImm(0);
- }
- break;
- case 'v': // not offsetable
-#if 1
- llvm_unreachable("InlineAsmMemoryOperand 'v' constraint not handled.");
-#else
- SelectAddrIdxOnly(Op, Op, Op0, Op1);
- break;
-#endif
- }
-
- OutOps.push_back(Op0);
- OutOps.push_back(Op1);
- return false;
- }
-
- virtual const char *getPassName() const {
- return "Cell SPU DAG->DAG Pattern Instruction Selection";
- }
-
- private:
- SDValue getRC( MVT );
-
- // Include the pieces autogenerated from the target description.
-#include "SPUGenDAGISel.inc"
- };
-}
-
-/*!
- \param Op The ISD instruction operand
- \param N The address to be tested
- \param Base The base address
- \param Index The base address index
- */
-bool
-SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Index) {
- // These match the addr256k operand type:
- EVT OffsVT = MVT::i16;
- SDValue Zero = CurDAG->getTargetConstant(0, OffsVT);
- int64_t val;
-
- switch (N.getOpcode()) {
- case ISD::Constant:
- val = dyn_cast<ConstantSDNode>(N.getNode())->getSExtValue();
- Base = CurDAG->getTargetConstant( val , MVT::i32);
- Index = Zero;
- return true;
- case ISD::ConstantPool:
- case ISD::GlobalAddress:
- report_fatal_error("SPU SelectAFormAddr: Pool/Global not lowered.");
- /*NOTREACHED*/
-
- case ISD::TargetConstant:
- case ISD::TargetGlobalAddress:
- case ISD::TargetJumpTable:
- report_fatal_error("SPUSelectAFormAddr: Target Constant/Pool/Global "
- "not wrapped as A-form address.");
- /*NOTREACHED*/
-
- case SPUISD::AFormAddr:
- // Just load from memory if there's only a single use of the location,
- // otherwise, this will get handled below with D-form offset addresses
- if (N.hasOneUse()) {
- SDValue Op0 = N.getOperand(0);
- switch (Op0.getOpcode()) {
- case ISD::TargetConstantPool:
- case ISD::TargetJumpTable:
- Base = Op0;
- Index = Zero;
- return true;
-
- case ISD::TargetGlobalAddress: {
- GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op0);
- const GlobalValue *GV = GSDN->getGlobal();
- if (GV->getAlignment() == 16) {
- Base = Op0;
- Index = Zero;
- return true;
- }
- break;
- }
- }
- }
- break;
- }
- return false;
-}
-
-bool
-SPUDAGToDAGISel::SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp,
- SDValue &Base) {
- const int minDForm2Offset = -(1 << 7);
- const int maxDForm2Offset = (1 << 7) - 1;
- return DFormAddressPredicate(Op, N, Disp, Base, minDForm2Offset,
- maxDForm2Offset);
-}
-
-/*!
- \param Op The ISD instruction (ignored)
- \param N The address to be tested
- \param Base Base address register/pointer
- \param Index Base address index
-
- Examine the input address by a base register plus a signed 10-bit
- displacement, [r+I10] (D-form address).
-
- \return true if \a N is a D-form address with \a Base and \a Index set
- to non-empty SDValue instances.
-*/
-bool
-SPUDAGToDAGISel::SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Index) {
- return DFormAddressPredicate(Op, N, Base, Index,
- SPUFrameLowering::minFrameOffset(),
- SPUFrameLowering::maxFrameOffset());
-}
-
-bool
-SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Index, int minOffset,
- int maxOffset) {
- unsigned Opc = N.getOpcode();
- EVT PtrTy = SPUtli.getPointerTy();
-
- if (Opc == ISD::FrameIndex) {
- // Stack frame index must be less than 512 (divided by 16):
- FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(N);
- int FI = int(FIN->getIndex());
- DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = "
- << FI << "\n");
- if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
- Base = CurDAG->getTargetConstant(0, PtrTy);
- Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
- return true;
- }
- } else if (Opc == ISD::ADD) {
- // Generated by getelementptr
- const SDValue Op0 = N.getOperand(0);
- const SDValue Op1 = N.getOperand(1);
-
- if ((Op0.getOpcode() == SPUISD::Hi && Op1.getOpcode() == SPUISD::Lo)
- || (Op1.getOpcode() == SPUISD::Hi && Op0.getOpcode() == SPUISD::Lo)) {
- Base = CurDAG->getTargetConstant(0, PtrTy);
- Index = N;
- return true;
- } else if (Op1.getOpcode() == ISD::Constant
- || Op1.getOpcode() == ISD::TargetConstant) {
- ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
- int32_t offset = int32_t(CN->getSExtValue());
-
- if (Op0.getOpcode() == ISD::FrameIndex) {
- FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op0);
- int FI = int(FIN->getIndex());
- DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
- << " frame index = " << FI << "\n");
-
- if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
- Base = CurDAG->getTargetConstant(offset, PtrTy);
- Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
- return true;
- }
- } else if (offset > minOffset && offset < maxOffset) {
- Base = CurDAG->getTargetConstant(offset, PtrTy);
- Index = Op0;
- return true;
- }
- } else if (Op0.getOpcode() == ISD::Constant
- || Op0.getOpcode() == ISD::TargetConstant) {
- ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
- int32_t offset = int32_t(CN->getSExtValue());
-
- if (Op1.getOpcode() == ISD::FrameIndex) {
- FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op1);
- int FI = int(FIN->getIndex());
- DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
- << " frame index = " << FI << "\n");
-
- if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
- Base = CurDAG->getTargetConstant(offset, PtrTy);
- Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
- return true;
- }
- } else if (offset > minOffset && offset < maxOffset) {
- Base = CurDAG->getTargetConstant(offset, PtrTy);
- Index = Op1;
- return true;
- }
- }
- } else if (Opc == SPUISD::IndirectAddr) {
- // Indirect with constant offset -> D-Form address
- const SDValue Op0 = N.getOperand(0);
- const SDValue Op1 = N.getOperand(1);
-
- if (Op0.getOpcode() == SPUISD::Hi
- && Op1.getOpcode() == SPUISD::Lo) {
- // (SPUindirect (SPUhi <arg>, 0), (SPUlo <arg>, 0))
- Base = CurDAG->getTargetConstant(0, PtrTy);
- Index = N;
- return true;
- } else if (isa<ConstantSDNode>(Op0) || isa<ConstantSDNode>(Op1)) {
- int32_t offset = 0;
- SDValue idxOp;
-
- if (isa<ConstantSDNode>(Op1)) {
- ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
- offset = int32_t(CN->getSExtValue());
- idxOp = Op0;
- } else if (isa<ConstantSDNode>(Op0)) {
- ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
- offset = int32_t(CN->getSExtValue());
- idxOp = Op1;
- }
-
- if (offset >= minOffset && offset <= maxOffset) {
- Base = CurDAG->getTargetConstant(offset, PtrTy);
- Index = idxOp;
- return true;
- }
- }
- } else if (Opc == SPUISD::AFormAddr) {
- Base = CurDAG->getTargetConstant(0, N.getValueType());
- Index = N;
- return true;
- } else if (Opc == SPUISD::LDRESULT) {
- Base = CurDAG->getTargetConstant(0, N.getValueType());
- Index = N;
- return true;
- } else if (Opc == ISD::Register
- ||Opc == ISD::CopyFromReg
- ||Opc == ISD::UNDEF
- ||Opc == ISD::Constant) {
- unsigned OpOpc = Op->getOpcode();
-
- if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) {
- // Direct load/store without getelementptr
- SDValue Offs;
-
- Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2));
-
- if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) {
- if (Offs.getOpcode() == ISD::UNDEF)
- Offs = CurDAG->getTargetConstant(0, Offs.getValueType());
-
- Base = Offs;
- Index = N;
- return true;
- }
- } else {
- /* If otherwise unadorned, default to D-form address with 0 offset: */
- if (Opc == ISD::CopyFromReg) {
- Index = N.getOperand(1);
- } else {
- Index = N;
- }
-
- Base = CurDAG->getTargetConstant(0, Index.getValueType());
- return true;
- }
- }
-
- return false;
-}
-
-/*!
- \param Op The ISD instruction operand
- \param N The address operand
- \param Base The base pointer operand
- \param Index The offset/index operand
-
- If the address \a N can be expressed as an A-form or D-form address, returns
- false. Otherwise, creates two operands, Base and Index that will become the
- (r)(r) X-form address.
-*/
-bool
-SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Index) {
- if (!SelectAFormAddr(Op, N, Base, Index)
- && !SelectDFormAddr(Op, N, Base, Index)) {
- // If the address is neither A-form or D-form, punt and use an X-form
- // address:
- Base = N.getOperand(1);
- Index = N.getOperand(0);
- return true;
- }
-
- return false;
-}
-
-/*!
- Utility function to use with COPY_TO_REGCLASS instructions. Returns a SDValue
- to be used as the last parameter of a
-CurDAG->getMachineNode(COPY_TO_REGCLASS,..., ) function call
- \param VT the value type for which we want a register class
-*/
-SDValue SPUDAGToDAGISel::getRC( MVT VT ) {
- switch( VT.SimpleTy ) {
- case MVT::i8:
- return CurDAG->getTargetConstant(SPU::R8CRegClass.getID(), MVT::i32);
- case MVT::i16:
- return CurDAG->getTargetConstant(SPU::R16CRegClass.getID(), MVT::i32);
- case MVT::i32:
- return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32);
- case MVT::f32:
- return CurDAG->getTargetConstant(SPU::R32FPRegClass.getID(), MVT::i32);
- case MVT::i64:
- return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32);
- case MVT::i128:
- return CurDAG->getTargetConstant(SPU::GPRCRegClass.getID(), MVT::i32);
- case MVT::v16i8:
- case MVT::v8i16:
- case MVT::v4i32:
- case MVT::v4f32:
- case MVT::v2i64:
- case MVT::v2f64:
- return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32);
- default:
- assert( false && "add a new case here" );
- return SDValue();
- }
-}
-
-//! Convert the operand from a target-independent to a target-specific node
-/*!
- */
-SDNode *
-SPUDAGToDAGISel::Select(SDNode *N) {
- unsigned Opc = N->getOpcode();
- int n_ops = -1;
- unsigned NewOpc = 0;
- EVT OpVT = N->getValueType(0);
- SDValue Ops[8];
- DebugLoc dl = N->getDebugLoc();
-
- if (N->isMachineOpcode())
- return NULL; // Already selected.
-
- if (Opc == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(N)->getIndex();
- SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
- SDValue Imm0 = CurDAG->getTargetConstant(0, N->getValueType(0));
-
- if (FI < 128) {
- NewOpc = SPU::AIr32;
- Ops[0] = TFI;
- Ops[1] = Imm0;
- n_ops = 2;
- } else {
- NewOpc = SPU::Ar32;
- Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0));
- Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl,
- N->getValueType(0), TFI),
- 0);
- n_ops = 2;
- }
- } else if (Opc == ISD::Constant && OpVT == MVT::i64) {
- // Catch the i64 constants that end up here. Note: The backend doesn't
- // attempt to legalize the constant (it's useless because DAGCombiner
- // will insert 64-bit constants and we can't stop it).
- return SelectI64Constant(N, OpVT, N->getDebugLoc());
- } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
- && OpVT == MVT::i64) {
- SDValue Op0 = N->getOperand(0);
- EVT Op0VT = Op0.getValueType();
- EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(),
- Op0VT, (128 / Op0VT.getSizeInBits()));
- EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(),
- OpVT, (128 / OpVT.getSizeInBits()));
- SDValue shufMask;
-
- switch (Op0VT.getSimpleVT().SimpleTy) {
- default:
- report_fatal_error("CellSPU Select: Unhandled zero/any extend EVT");
- /*NOTREACHED*/
- case MVT::i32:
- shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- CurDAG->getConstant(0x80808080, MVT::i32),
- CurDAG->getConstant(0x00010203, MVT::i32),
- CurDAG->getConstant(0x80808080, MVT::i32),
- CurDAG->getConstant(0x08090a0b, MVT::i32));
- break;
-
- case MVT::i16:
- shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- CurDAG->getConstant(0x80808080, MVT::i32),
- CurDAG->getConstant(0x80800203, MVT::i32),
- CurDAG->getConstant(0x80808080, MVT::i32),
- CurDAG->getConstant(0x80800a0b, MVT::i32));
- break;
-
- case MVT::i8:
- shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- CurDAG->getConstant(0x80808080, MVT::i32),
- CurDAG->getConstant(0x80808003, MVT::i32),
- CurDAG->getConstant(0x80808080, MVT::i32),
- CurDAG->getConstant(0x8080800b, MVT::i32));
- break;
- }
-
- SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode());
-
- HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
- Op0VecVT, Op0));
-
- SDValue PromScalar;
- if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode()))
- PromScalar = SDValue(N, 0);
- else
- PromScalar = PromoteScalar.getValue();
-
- SDValue zextShuffle =
- CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
- PromScalar, PromScalar,
- SDValue(shufMaskLoad, 0));
-
- HandleSDNode Dummy2(zextShuffle);
- if (SDNode *N = SelectCode(Dummy2.getValue().getNode()))
- zextShuffle = SDValue(N, 0);
- else
- zextShuffle = Dummy2.getValue();
- HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
- zextShuffle));
-
- CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
- SelectCode(Dummy.getValue().getNode());
- return Dummy.getValue().getNode();
- } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
- SDNode *CGLoad =
- emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
-
- HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
- N->getOperand(0), N->getOperand(1),
- SDValue(CGLoad, 0)));
-
- CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
- if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
- return N;
- return Dummy.getValue().getNode();
- } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
- SDNode *CGLoad =
- emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode());
-
- HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
- N->getOperand(0), N->getOperand(1),
- SDValue(CGLoad, 0)));
-
- CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
- if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
- return N;
- return Dummy.getValue().getNode();
- } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
- SDNode *CGLoad =
- emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
-
- HandleSDNode Dummy(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
- N->getOperand(0), N->getOperand(1),
- SDValue(CGLoad, 0)));
- CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
- if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
- return N;
- return Dummy.getValue().getNode();
- } else if (Opc == ISD::TRUNCATE) {
- SDValue Op0 = N->getOperand(0);
- if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL)
- && OpVT == MVT::i32
- && Op0.getValueType() == MVT::i64) {
- // Catch (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32
- //
- // Take advantage of the fact that the upper 32 bits are in the
- // i32 preferred slot and avoid shuffle gymnastics:
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
- if (CN != 0) {
- unsigned shift_amt = unsigned(CN->getZExtValue());
-
- if (shift_amt >= 32) {
- SDNode *hi32 =
- CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
- Op0.getOperand(0), getRC(MVT::i32));
-
- shift_amt -= 32;
- if (shift_amt > 0) {
- // Take care of the additional shift, if present:
- SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32);
- unsigned Opc = SPU::ROTMAIr32_i32;
-
- if (Op0.getOpcode() == ISD::SRL)
- Opc = SPU::ROTMr32;
-
- hi32 = CurDAG->getMachineNode(Opc, dl, OpVT, SDValue(hi32, 0),
- shift);
- }
-
- return hi32;
- }
- }
- }
- } else if (Opc == ISD::SHL) {
- if (OpVT == MVT::i64)
- return SelectSHLi64(N, OpVT);
- } else if (Opc == ISD::SRL) {
- if (OpVT == MVT::i64)
- return SelectSRLi64(N, OpVT);
- } else if (Opc == ISD::SRA) {
- if (OpVT == MVT::i64)
- return SelectSRAi64(N, OpVT);
- } else if (Opc == ISD::FNEG
- && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) {
- DebugLoc dl = N->getDebugLoc();
- // Check if the pattern is a special form of DFNMS:
- // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))
- SDValue Op0 = N->getOperand(0);
- if (Op0.getOpcode() == ISD::FSUB) {
- SDValue Op00 = Op0.getOperand(0);
- if (Op00.getOpcode() == ISD::FMUL) {
- unsigned Opc = SPU::DFNMSf64;
- if (OpVT == MVT::v2f64)
- Opc = SPU::DFNMSv2f64;
-
- return CurDAG->getMachineNode(Opc, dl, OpVT,
- Op00.getOperand(0),
- Op00.getOperand(1),
- Op0.getOperand(1));
- }
- }
-
- SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64);
- SDNode *signMask = 0;
- unsigned Opc = SPU::XORfneg64;
-
- if (OpVT == MVT::f64) {
- signMask = SelectI64Constant(negConst.getNode(), MVT::i64, dl);
- } else if (OpVT == MVT::v2f64) {
- Opc = SPU::XORfnegvec;
- signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl,
- MVT::v2i64,
- negConst, negConst).getNode());
- }
-
- return CurDAG->getMachineNode(Opc, dl, OpVT,
- N->getOperand(0), SDValue(signMask, 0));
- } else if (Opc == ISD::FABS) {
- if (OpVT == MVT::f64) {
- SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl);
- return CurDAG->getMachineNode(SPU::ANDfabs64, dl, OpVT,
- N->getOperand(0), SDValue(signMask, 0));
- } else if (OpVT == MVT::v2f64) {
- SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64);
- SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
- absConst, absConst);
- SDNode *signMask = emitBuildVector(absVec.getNode());
- return CurDAG->getMachineNode(SPU::ANDfabsvec, dl, OpVT,
- N->getOperand(0), SDValue(signMask, 0));
- }
- } else if (Opc == SPUISD::LDRESULT) {
- // Custom select instructions for LDRESULT
- EVT VT = N->getValueType(0);
- SDValue Arg = N->getOperand(0);
- SDValue Chain = N->getOperand(1);
- SDNode *Result;
-
- Result = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VT,
- MVT::Other, Arg,
- getRC( VT.getSimpleVT()), Chain);
- return Result;
-
- } else if (Opc == SPUISD::IndirectAddr) {
- // Look at the operands: SelectCode() will catch the cases that aren't
- // specifically handled here.
- //
- // SPUInstrInfo catches the following patterns:
- // (SPUindirect (SPUhi ...), (SPUlo ...))
- // (SPUindirect $sp, imm)
- EVT VT = N->getValueType(0);
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
- RegisterSDNode *RN;
-
- if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo)
- || (Op0.getOpcode() == ISD::Register
- && ((RN = dyn_cast<RegisterSDNode>(Op0.getNode())) != 0
- && RN->getReg() != SPU::R1))) {
- NewOpc = SPU::Ar32;
- Ops[1] = Op1;
- if (Op1.getOpcode() == ISD::Constant) {
- ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
- Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT);
- if (isInt<10>(CN->getSExtValue())) {
- NewOpc = SPU::AIr32;
- Ops[1] = Op1;
- } else {
- Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl,
- N->getValueType(0),
- Op1),
- 0);
- }
- }
- Ops[0] = Op0;
- n_ops = 2;
- }
- }
-
- if (n_ops > 0) {
- if (N->hasOneUse())
- return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops);
- else
- return CurDAG->getMachineNode(NewOpc, dl, OpVT, Ops, n_ops);
- } else
- return SelectCode(N);
-}
-
-/*!
- * Emit the instruction sequence for i64 left shifts. The basic algorithm
- * is to fill the bottom two word slots with zeros so that zeros are shifted
- * in as the entire quadword is shifted left.
- *
- * \note This code could also be used to implement v2i64 shl.
- *
- * @param Op The shl operand
- * @param OpVT Op's machine value value type (doesn't need to be passed, but
- * makes life easier.)
- * @return The SDNode with the entire instruction sequence
- */
-SDNode *
-SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) {
- SDValue Op0 = N->getOperand(0);
- EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
- OpVT, (128 / OpVT.getSizeInBits()));
- SDValue ShiftAmt = N->getOperand(1);
- EVT ShiftAmtVT = ShiftAmt.getValueType();
- SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0;
- SDValue SelMaskVal;
- DebugLoc dl = N->getDebugLoc();
-
- VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
- Op0, getRC(MVT::v2i64) );
- SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16);
- SelMask = CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal);
- ZeroFill = CurDAG->getMachineNode(SPU::ILv2i64, dl, VecVT,
- CurDAG->getTargetConstant(0, OpVT));
- VecOp0 = CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT,
- SDValue(ZeroFill, 0),
- SDValue(VecOp0, 0),
- SDValue(SelMask, 0));
-
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
- unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
- unsigned bits = unsigned(CN->getZExtValue()) & 7;
-
- if (bytes > 0) {
- Shift =
- CurDAG->getMachineNode(SPU::SHLQBYIv2i64, dl, VecVT,
- SDValue(VecOp0, 0),
- CurDAG->getTargetConstant(bytes, ShiftAmtVT));
- }
-
- if (bits > 0) {
- Shift =
- CurDAG->getMachineNode(SPU::SHLQBIIv2i64, dl, VecVT,
- SDValue((Shift != 0 ? Shift : VecOp0), 0),
- CurDAG->getTargetConstant(bits, ShiftAmtVT));
- }
- } else {
- SDNode *Bytes =
- CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT,
- ShiftAmt,
- CurDAG->getTargetConstant(3, ShiftAmtVT));
- SDNode *Bits =
- CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT,
- ShiftAmt,
- CurDAG->getTargetConstant(7, ShiftAmtVT));
- Shift =
- CurDAG->getMachineNode(SPU::SHLQBYv2i64, dl, VecVT,
- SDValue(VecOp0, 0), SDValue(Bytes, 0));
- Shift =
- CurDAG->getMachineNode(SPU::SHLQBIv2i64, dl, VecVT,
- SDValue(Shift, 0), SDValue(Bits, 0));
- }
-
- return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
- OpVT, SDValue(Shift, 0), getRC(MVT::i64));
-}
-
-/*!
- * Emit the instruction sequence for i64 logical right shifts.
- *
- * @param Op The shl operand
- * @param OpVT Op's machine value value type (doesn't need to be passed, but
- * makes life easier.)
- * @return The SDNode with the entire instruction sequence
- */
-SDNode *
-SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) {
- SDValue Op0 = N->getOperand(0);
- EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
- OpVT, (128 / OpVT.getSizeInBits()));
- SDValue ShiftAmt = N->getOperand(1);
- EVT ShiftAmtVT = ShiftAmt.getValueType();
- SDNode *VecOp0, *Shift = 0;
- DebugLoc dl = N->getDebugLoc();
-
- VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
- Op0, getRC(MVT::v2i64) );
-
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
- unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
- unsigned bits = unsigned(CN->getZExtValue()) & 7;
-
- if (bytes > 0) {
- Shift =
- CurDAG->getMachineNode(SPU::ROTQMBYIv2i64, dl, VecVT,
- SDValue(VecOp0, 0),
- CurDAG->getTargetConstant(bytes, ShiftAmtVT));
- }
-
- if (bits > 0) {
- Shift =
- CurDAG->getMachineNode(SPU::ROTQMBIIv2i64, dl, VecVT,
- SDValue((Shift != 0 ? Shift : VecOp0), 0),
- CurDAG->getTargetConstant(bits, ShiftAmtVT));
- }
- } else {
- SDNode *Bytes =
- CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT,
- ShiftAmt,
- CurDAG->getTargetConstant(3, ShiftAmtVT));
- SDNode *Bits =
- CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT,
- ShiftAmt,
- CurDAG->getTargetConstant(7, ShiftAmtVT));
-
- // Ensure that the shift amounts are negated!
- Bytes = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
- SDValue(Bytes, 0),
- CurDAG->getTargetConstant(0, ShiftAmtVT));
-
- Bits = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
- SDValue(Bits, 0),
- CurDAG->getTargetConstant(0, ShiftAmtVT));
-
- Shift =
- CurDAG->getMachineNode(SPU::ROTQMBYv2i64, dl, VecVT,
- SDValue(VecOp0, 0), SDValue(Bytes, 0));
- Shift =
- CurDAG->getMachineNode(SPU::ROTQMBIv2i64, dl, VecVT,
- SDValue(Shift, 0), SDValue(Bits, 0));
- }
-
- return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
- OpVT, SDValue(Shift, 0), getRC(MVT::i64));
-}
-
-/*!
- * Emit the instruction sequence for i64 arithmetic right shifts.
- *
- * @param Op The shl operand
- * @param OpVT Op's machine value value type (doesn't need to be passed, but
- * makes life easier.)
- * @return The SDNode with the entire instruction sequence
- */
-SDNode *
-SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) {
- // Promote Op0 to vector
- EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
- OpVT, (128 / OpVT.getSizeInBits()));
- SDValue ShiftAmt = N->getOperand(1);
- EVT ShiftAmtVT = ShiftAmt.getValueType();
- DebugLoc dl = N->getDebugLoc();
-
- SDNode *VecOp0 =
- CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
- VecVT, N->getOperand(0), getRC(MVT::v2i64));
-
- SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT);
- SDNode *SignRot =
- CurDAG->getMachineNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64,
- SDValue(VecOp0, 0), SignRotAmt);
- SDNode *UpperHalfSign =
- CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
- MVT::i32, SDValue(SignRot, 0), getRC(MVT::i32));
-
- SDNode *UpperHalfSignMask =
- CurDAG->getMachineNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0));
- SDNode *UpperLowerMask =
- CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT,
- CurDAG->getTargetConstant(0xff00ULL, MVT::i16));
- SDNode *UpperLowerSelect =
- CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT,
- SDValue(UpperHalfSignMask, 0),
- SDValue(VecOp0, 0),
- SDValue(UpperLowerMask, 0));
-
- SDNode *Shift = 0;
-
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
- unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
- unsigned bits = unsigned(CN->getZExtValue()) & 7;
-
- if (bytes > 0) {
- bytes = 31 - bytes;
- Shift =
- CurDAG->getMachineNode(SPU::ROTQBYIv2i64, dl, VecVT,
- SDValue(UpperLowerSelect, 0),
- CurDAG->getTargetConstant(bytes, ShiftAmtVT));
- }
-
- if (bits > 0) {
- bits = 8 - bits;
- Shift =
- CurDAG->getMachineNode(SPU::ROTQBIIv2i64, dl, VecVT,
- SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0),
- CurDAG->getTargetConstant(bits, ShiftAmtVT));
- }
- } else {
- SDNode *NegShift =
- CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
- ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT));
-
- Shift =
- CurDAG->getMachineNode(SPU::ROTQBYBIv2i64_r32, dl, VecVT,
- SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0));
- Shift =
- CurDAG->getMachineNode(SPU::ROTQBIv2i64, dl, VecVT,
- SDValue(Shift, 0), SDValue(NegShift, 0));
- }
-
- return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
- OpVT, SDValue(Shift, 0), getRC(MVT::i64));
-}
-
-/*!
- Do the necessary magic necessary to load a i64 constant
- */
-SDNode *SPUDAGToDAGISel::SelectI64Constant(SDNode *N, EVT OpVT,
- DebugLoc dl) {
- ConstantSDNode *CN = cast<ConstantSDNode>(N);
- return SelectI64Constant(CN->getZExtValue(), OpVT, dl);
-}
-
-SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
- DebugLoc dl) {
- EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, 2);
- SDValue i64vec =
- SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl);
-
- // Here's where it gets interesting, because we have to parse out the
- // subtree handed back in i64vec:
-
- if (i64vec.getOpcode() == ISD::BITCAST) {
- // The degenerate case where the upper and lower bits in the splat are
- // identical:
- SDValue Op0 = i64vec.getOperand(0);
-
- ReplaceUses(i64vec, Op0);
- return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
- SDValue(emitBuildVector(Op0.getNode()), 0),
- getRC(MVT::i64));
- } else if (i64vec.getOpcode() == SPUISD::SHUFB) {
- SDValue lhs = i64vec.getOperand(0);
- SDValue rhs = i64vec.getOperand(1);
- SDValue shufmask = i64vec.getOperand(2);
-
- if (lhs.getOpcode() == ISD::BITCAST) {
- ReplaceUses(lhs, lhs.getOperand(0));
- lhs = lhs.getOperand(0);
- }
-
- SDNode *lhsNode = (lhs.getNode()->isMachineOpcode()
- ? lhs.getNode()
- : emitBuildVector(lhs.getNode()));
-
- if (rhs.getOpcode() == ISD::BITCAST) {
- ReplaceUses(rhs, rhs.getOperand(0));
- rhs = rhs.getOperand(0);
- }
-
- SDNode *rhsNode = (rhs.getNode()->isMachineOpcode()
- ? rhs.getNode()
- : emitBuildVector(rhs.getNode()));
-
- if (shufmask.getOpcode() == ISD::BITCAST) {
- ReplaceUses(shufmask, shufmask.getOperand(0));
- shufmask = shufmask.getOperand(0);
- }
-
- SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode()
- ? shufmask.getNode()
- : emitBuildVector(shufmask.getNode()));
-
- SDValue shufNode =
- CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
- SDValue(lhsNode, 0), SDValue(rhsNode, 0),
- SDValue(shufMaskNode, 0));
- HandleSDNode Dummy(shufNode);
- SDNode *SN = SelectCode(Dummy.getValue().getNode());
- if (SN == 0) SN = Dummy.getValue().getNode();
-
- return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
- OpVT, SDValue(SN, 0), getRC(MVT::i64));
- } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
- return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
- SDValue(emitBuildVector(i64vec.getNode()), 0),
- getRC(MVT::i64));
- } else {
- report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec"
- "condition");
- }
-}
-
-/// createSPUISelDag - This pass converts a legalized DAG into a
-/// SPU-specific DAG, ready for instruction scheduling.
-///
-FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) {
- return new SPUDAGToDAGISel(TM);
-}
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
deleted file mode 100644
index 4e9fcd1bc7..0000000000
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ /dev/null
@@ -1,3266 +0,0 @@
-//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SPUTargetLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUISelLowering.h"
-#include "SPUTargetMachine.h"
-#include "SPUFrameLowering.h"
-#include "SPUMachineFunction.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
- // Byte offset of the preferred slot (counted from the MSB)
- int prefslotOffset(EVT VT) {
- int retval=0;
- if (VT==MVT::i1) retval=3;
- if (VT==MVT::i8) retval=3;
- if (VT==MVT::i16) retval=2;
-
- return retval;
- }
-
- //! Expand a library call into an actual call DAG node
- /*!
- \note
- This code is taken from SelectionDAGLegalize, since it is not exposed as
- part of the LLVM SelectionDAG API.
- */
-
- SDValue
- ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
- bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
- // The input chain to this libcall is the entry node of the function.
- // Legalizing the call will automatically add the previous call to the
- // dependence.
- SDValue InChain = DAG.getEntryNode();
-
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
- EVT ArgVT = Op.getOperand(i).getValueType();
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- Entry.Node = Op.getOperand(i);
- Entry.Ty = ArgTy;
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
- Args.push_back(Entry);
- }
- SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy());
-
- // Splice the libcall in wherever FindInputOutputChains tells us to.
- Type *RetTy =
- Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
- TargetLowering::CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned,
- false, false,
- 0, TLI.getLibcallCallingConv(LC),
- /*isTailCall=*/false,
- /*doesNotRet=*/false,
- /*isReturnValueUsed=*/true,
- Callee, Args, DAG, Op.getDebugLoc());
- std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
-
- return CallInfo.first;
- }
-}
-
-SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
- : TargetLowering(TM, new TargetLoweringObjectFileELF()),
- SPUTM(TM) {
-
- // Use _setjmp/_longjmp instead of setjmp/longjmp.
- setUseUnderscoreSetJmp(true);
- setUseUnderscoreLongJmp(true);
-
- // Set RTLIB libcall names as used by SPU:
- setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
-
- // Set up the SPU's register classes:
- addRegisterClass(MVT::i8, &SPU::R8CRegClass);
- addRegisterClass(MVT::i16, &SPU::R16CRegClass);
- addRegisterClass(MVT::i32, &SPU::R32CRegClass);
- addRegisterClass(MVT::i64, &SPU::R64CRegClass);
- addRegisterClass(MVT::f32, &SPU::R32FPRegClass);
- addRegisterClass(MVT::f64, &SPU::R64FPRegClass);
- addRegisterClass(MVT::i128, &SPU::GPRCRegClass);
-
- // SPU has no sign or zero extended loads for i1, i8, i16:
- setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
-
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
-
- setTruncStoreAction(MVT::i128, MVT::i64, Expand);
- setTruncStoreAction(MVT::i128, MVT::i32, Expand);
- setTruncStoreAction(MVT::i128, MVT::i16, Expand);
- setTruncStoreAction(MVT::i128, MVT::i8, Expand);
-
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-
- // SPU constant load actions are custom lowered:
- setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
- setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
-
- // SPU's loads and stores have to be custom lowered:
- for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
- ++sctype) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
-
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
- setLoadExtAction(ISD::EXTLOAD, VT, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
-
- for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
- MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
- setTruncStoreAction(VT, StoreVT, Expand);
- }
- }
-
- for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
- ++sctype) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
-
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
-
- for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
- MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
- setTruncStoreAction(VT, StoreVT, Expand);
- }
- }
-
- // Expand the jumptable branches
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BR_CC, MVT::Other, Expand);
-
- // Custom lower SELECT_CC for most cases, but expand by default
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
-
- // SPU has no intrinsics for these particular operations:
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
-
- // SPU has no division/remainder instructions
- setOperationAction(ISD::SREM, MVT::i8, Expand);
- setOperationAction(ISD::UREM, MVT::i8, Expand);
- setOperationAction(ISD::SDIV, MVT::i8, Expand);
- setOperationAction(ISD::UDIV, MVT::i8, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
- setOperationAction(ISD::SREM, MVT::i16, Expand);
- setOperationAction(ISD::UREM, MVT::i16, Expand);
- setOperationAction(ISD::SDIV, MVT::i16, Expand);
- setOperationAction(ISD::UDIV, MVT::i16, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
- setOperationAction(ISD::SREM, MVT::i32, Expand);
- setOperationAction(ISD::UREM, MVT::i32, Expand);
- setOperationAction(ISD::SDIV, MVT::i32, Expand);
- setOperationAction(ISD::UDIV, MVT::i32, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::SREM, MVT::i64, Expand);
- setOperationAction(ISD::UREM, MVT::i64, Expand);
- setOperationAction(ISD::SDIV, MVT::i64, Expand);
- setOperationAction(ISD::UDIV, MVT::i64, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
- setOperationAction(ISD::SREM, MVT::i128, Expand);
- setOperationAction(ISD::UREM, MVT::i128, Expand);
- setOperationAction(ISD::SDIV, MVT::i128, Expand);
- setOperationAction(ISD::UDIV, MVT::i128, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
-
- // We don't support sin/cos/sqrt/fmod
- setOperationAction(ISD::FSIN , MVT::f64, Expand);
- setOperationAction(ISD::FCOS , MVT::f64, Expand);
- setOperationAction(ISD::FREM , MVT::f64, Expand);
- setOperationAction(ISD::FSIN , MVT::f32, Expand);
- setOperationAction(ISD::FCOS , MVT::f32, Expand);
- setOperationAction(ISD::FREM , MVT::f32, Expand);
-
- // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
- // for f32!)
- setOperationAction(ISD::FSQRT, MVT::f64, Expand);
- setOperationAction(ISD::FSQRT, MVT::f32, Expand);
-
- setOperationAction(ISD::FMA, MVT::f64, Expand);
- setOperationAction(ISD::FMA, MVT::f32, Expand);
-
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
-
- // SPU can do rotate right and left, so legalize it... but customize for i8
- // because instructions don't exist.
-
- // FIXME: Change from "expand" to appropriate type once ROTR is supported in
- // .td files.
- setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
- setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
- setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
-
- setOperationAction(ISD::ROTL, MVT::i32, Legal);
- setOperationAction(ISD::ROTL, MVT::i16, Legal);
- setOperationAction(ISD::ROTL, MVT::i8, Custom);
-
- // SPU has no native version of shift left/right for i8
- setOperationAction(ISD::SHL, MVT::i8, Custom);
- setOperationAction(ISD::SRL, MVT::i8, Custom);
- setOperationAction(ISD::SRA, MVT::i8, Custom);
-
- // Make these operations legal and handle them during instruction selection:
- setOperationAction(ISD::SHL, MVT::i64, Legal);
- setOperationAction(ISD::SRL, MVT::i64, Legal);
- setOperationAction(ISD::SRA, MVT::i64, Legal);
-
- // Custom lower i8, i32 and i64 multiplications
- setOperationAction(ISD::MUL, MVT::i8, Custom);
- setOperationAction(ISD::MUL, MVT::i32, Legal);
- setOperationAction(ISD::MUL, MVT::i64, Legal);
-
- // Expand double-width multiplication
- // FIXME: It would probably be reasonable to support some of these operations
- setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
- setOperationAction(ISD::MULHU, MVT::i8, Expand);
- setOperationAction(ISD::MULHS, MVT::i8, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
- setOperationAction(ISD::MULHU, MVT::i16, Expand);
- setOperationAction(ISD::MULHS, MVT::i16, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::MULHU, MVT::i32, Expand);
- setOperationAction(ISD::MULHS, MVT::i32, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::MULHU, MVT::i64, Expand);
- setOperationAction(ISD::MULHS, MVT::i64, Expand);
-
- // Need to custom handle (some) common i8, i64 math ops
- setOperationAction(ISD::ADD, MVT::i8, Custom);
- setOperationAction(ISD::ADD, MVT::i64, Legal);
- setOperationAction(ISD::SUB, MVT::i8, Custom);
- setOperationAction(ISD::SUB, MVT::i64, Legal);
-
- // SPU does not have BSWAP. It does have i32 support CTLZ.
- // CTPOP has to be custom lowered.
- setOperationAction(ISD::BSWAP, MVT::i32, Expand);
- setOperationAction(ISD::BSWAP, MVT::i64, Expand);
-
- setOperationAction(ISD::CTPOP, MVT::i8, Custom);
- setOperationAction(ISD::CTPOP, MVT::i16, Custom);
- setOperationAction(ISD::CTPOP, MVT::i32, Custom);
- setOperationAction(ISD::CTPOP, MVT::i64, Custom);
- setOperationAction(ISD::CTPOP, MVT::i128, Expand);
-
- setOperationAction(ISD::CTTZ , MVT::i8, Expand);
- setOperationAction(ISD::CTTZ , MVT::i16, Expand);
- setOperationAction(ISD::CTTZ , MVT::i32, Expand);
- setOperationAction(ISD::CTTZ , MVT::i64, Expand);
- setOperationAction(ISD::CTTZ , MVT::i128, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i128, Expand);
-
- setOperationAction(ISD::CTLZ , MVT::i8, Promote);
- setOperationAction(ISD::CTLZ , MVT::i16, Promote);
- setOperationAction(ISD::CTLZ , MVT::i32, Legal);
- setOperationAction(ISD::CTLZ , MVT::i64, Expand);
- setOperationAction(ISD::CTLZ , MVT::i128, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i128, Expand);
-
- // SPU has a version of select that implements (a&~c)|(b&c), just like
- // select ought to work:
- setOperationAction(ISD::SELECT, MVT::i8, Legal);
- setOperationAction(ISD::SELECT, MVT::i16, Legal);
- setOperationAction(ISD::SELECT, MVT::i32, Legal);
- setOperationAction(ISD::SELECT, MVT::i64, Legal);
-
- setOperationAction(ISD::SETCC, MVT::i8, Legal);
- setOperationAction(ISD::SETCC, MVT::i16, Legal);
- setOperationAction(ISD::SETCC, MVT::i32, Legal);
- setOperationAction(ISD::SETCC, MVT::i64, Legal);
- setOperationAction(ISD::SETCC, MVT::f64, Custom);
-
- // Custom lower i128 -> i64 truncates
- setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
-
- // Custom lower i32/i64 -> i128 sign extend
- setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
-
- setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
- // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
- // to expand to a libcall, hence the custom lowering:
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
- setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
-
- // FDIV on SPU requires custom lowering
- setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
-
- // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
-
- setOperationAction(ISD::BITCAST, MVT::i32, Legal);
- setOperationAction(ISD::BITCAST, MVT::f32, Legal);
- setOperationAction(ISD::BITCAST, MVT::i64, Legal);
- setOperationAction(ISD::BITCAST, MVT::f64, Legal);
-
- // We cannot sextinreg(i1). Expand to shifts.
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-
- // We want to legalize GlobalAddress and ConstantPool nodes into the
- // appropriate instructions to materialize the address.
- for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
- ++sctype) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
-
- setOperationAction(ISD::GlobalAddress, VT, Custom);
- setOperationAction(ISD::ConstantPool, VT, Custom);
- setOperationAction(ISD::JumpTable, VT, Custom);
- }
-
- // VASTART needs to be custom lowered to use the VarArgsFrameIndex
- setOperationAction(ISD::VASTART , MVT::Other, Custom);
-
- // Use the default implementation.
- setOperationAction(ISD::VAARG , MVT::Other, Expand);
- setOperationAction(ISD::VACOPY , MVT::Other, Expand);
- setOperationAction(ISD::VAEND , MVT::Other, Expand);
- setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
- setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
-
- // Cell SPU has instructions for converting between i64 and fp.
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
-
- // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
-
- // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
- setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
-
- // First set operation action for all vector types to expand. Then we
- // will selectively turn on ones that can be effectively codegen'd.
- addRegisterClass(MVT::v16i8, &SPU::VECREGRegClass);
- addRegisterClass(MVT::v8i16, &SPU::VECREGRegClass);
- addRegisterClass(MVT::v4i32, &SPU::VECREGRegClass);
- addRegisterClass(MVT::v2i64, &SPU::VECREGRegClass);
- addRegisterClass(MVT::v4f32, &SPU::VECREGRegClass);
- addRegisterClass(MVT::v2f64, &SPU::VECREGRegClass);
-
- for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
-
- // Set operation actions to legal types only.
- if (!isTypeLegal(VT)) continue;
-
- // add/sub are legal for all supported vector VT's.
- setOperationAction(ISD::ADD, VT, Legal);
- setOperationAction(ISD::SUB, VT, Legal);
- // mul has to be custom lowered.
- setOperationAction(ISD::MUL, VT, Legal);
-
- setOperationAction(ISD::AND, VT, Legal);
- setOperationAction(ISD::OR, VT, Legal);
- setOperationAction(ISD::XOR, VT, Legal);
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Legal);
- setOperationAction(ISD::STORE, VT, Custom);
-
- // These operations need to be expanded:
- setOperationAction(ISD::SDIV, VT, Expand);
- setOperationAction(ISD::SREM, VT, Expand);
- setOperationAction(ISD::UDIV, VT, Expand);
- setOperationAction(ISD::UREM, VT, Expand);
-
- // Expand all trunc stores
- for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
- MVT::SimpleValueType TargetVT = (MVT::SimpleValueType)j;
- setTruncStoreAction(VT, TargetVT, Expand);
- }
-
- // Custom lower build_vector, constant pool spills, insert and
- // extract vector elements:
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::ConstantPool, VT, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- }
-
- setOperationAction(ISD::SHL, MVT::v2i64, Expand);
-
- setOperationAction(ISD::AND, MVT::v16i8, Custom);
- setOperationAction(ISD::OR, MVT::v16i8, Custom);
- setOperationAction(ISD::XOR, MVT::v16i8, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
-
- setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
-
- setBooleanContents(ZeroOrNegativeOneBooleanContent);
- setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct?
-
- setStackPointerRegisterToSaveRestore(SPU::R1);
-
- // We have target-specific dag combine patterns for the following nodes:
- setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::ZERO_EXTEND);
- setTargetDAGCombine(ISD::SIGN_EXTEND);
- setTargetDAGCombine(ISD::ANY_EXTEND);
-
- setMinFunctionAlignment(3);
-
- computeRegisterProperties();
-
- // Set pre-RA register scheduler default to BURR, which produces slightly
- // better code than the default (could also be TDRR, but TargetLowering.h
- // needs a mod to support that model):
- setSchedulingPreference(Sched::RegPressure);
-}
-
-const char *SPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch (Opcode) {
- default: return 0;
- case SPUISD::RET_FLAG: return "SPUISD::RET_FLAG";
- case SPUISD::Hi: return "SPUISD::Hi";
- case SPUISD::Lo: return "SPUISD::Lo";
- case SPUISD::PCRelAddr: return "SPUISD::PCRelAddr";
- case SPUISD::AFormAddr: return "SPUISD::AFormAddr";
- case SPUISD::IndirectAddr: return "SPUISD::IndirectAddr";
- case SPUISD::LDRESULT: return "SPUISD::LDRESULT";
- case SPUISD::CALL: return "SPUISD::CALL";
- case SPUISD::SHUFB: return "SPUISD::SHUFB";
- case SPUISD::SHUFFLE_MASK: return "SPUISD::SHUFFLE_MASK";
- case SPUISD::CNTB: return "SPUISD::CNTB";
- case SPUISD::PREFSLOT2VEC: return "SPUISD::PREFSLOT2VEC";
- case SPUISD::VEC2PREFSLOT: return "SPUISD::VEC2PREFSLOT";
- case SPUISD::SHL_BITS: return "SPUISD::SHL_BITS";
- case SPUISD::SHL_BYTES: return "SPUISD::SHL_BYTES";
- case SPUISD::VEC_ROTL: return "SPUISD::VEC_ROTL";
- case SPUISD::VEC_ROTR: return "SPUISD::VEC_ROTR";
- case SPUISD::ROTBYTES_LEFT: return "SPUISD::ROTBYTES_LEFT";
- case SPUISD::ROTBYTES_LEFT_BITS: return "SPUISD::ROTBYTES_LEFT_BITS";
- case SPUISD::SELECT_MASK: return "SPUISD::SELECT_MASK";
- case SPUISD::SELB: return "SPUISD::SELB";
- case SPUISD::ADD64_MARKER: return "SPUISD::ADD64_MARKER";
- case SPUISD::SUB64_MARKER: return "SPUISD::SUB64_MARKER";
- case SPUISD::MUL64_MARKER: return "SPUISD::MUL64_MARKER";
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Return the Cell SPU's SETCC result type
-//===----------------------------------------------------------------------===//
-
-EVT SPUTargetLowering::getSetCCResultType(EVT VT) const {
- // i8, i16 and i32 are valid SETCC result types
- MVT::SimpleValueType retval;
-
- switch(VT.getSimpleVT().SimpleTy){
- case MVT::i1:
- case MVT::i8:
- retval = MVT::i8; break;
- case MVT::i16:
- retval = MVT::i16; break;
- case MVT::i32:
- default:
- retval = MVT::i32;
- }
- return retval;
-}
-
-//===----------------------------------------------------------------------===//
-// Calling convention code:
-//===----------------------------------------------------------------------===//
-
-#include "SPUGenCallingConv.inc"
-
-//===----------------------------------------------------------------------===//
-// LowerOperation implementation
-//===----------------------------------------------------------------------===//
-
-/// Custom lower loads for CellSPU
-/*!
- All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
- within a 16-byte block, we have to rotate to extract the requested element.
-
- For extending loads, we also want to ensure that the following sequence is
- emitted, e.g. for MVT::f32 extending load to MVT::f64:
-
-\verbatim
-%1 v16i8,ch = load
-%2 v16i8,ch = rotate %1
-%3 v4f8, ch = bitconvert %2
-%4 f32 = vec2perfslot %3
-%5 f64 = fp_extend %4
-\endverbatim
-*/
-static SDValue
-LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
- LoadSDNode *LN = cast<LoadSDNode>(Op);
- SDValue the_chain = LN->getChain();
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- EVT InVT = LN->getMemoryVT();
- EVT OutVT = Op.getValueType();
- ISD::LoadExtType ExtType = LN->getExtensionType();
- unsigned alignment = LN->getAlignment();
- int pso = prefslotOffset(InVT);
- DebugLoc dl = Op.getDebugLoc();
- EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
- (128 / InVT.getSizeInBits()));
-
- // two sanity checks
- assert( LN->getAddressingMode() == ISD::UNINDEXED
- && "we should get only UNINDEXED adresses");
- // clean aligned loads can be selected as-is
- if (InVT.getSizeInBits() == 128 && (alignment%16) == 0)
- return SDValue();
-
- // Get pointerinfos to the memory chunk(s) that contain the data to load
- uint64_t mpi_offset = LN->getPointerInfo().Offset;
- mpi_offset -= mpi_offset%16;
- MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
- MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
-
- SDValue result;
- SDValue basePtr = LN->getBasePtr();
- SDValue rotate;
-
- if ((alignment%16) == 0) {
- ConstantSDNode *CN;
-
- // Special cases for a known aligned load to simplify the base pointer
- // and the rotation amount:
- if (basePtr.getOpcode() == ISD::ADD
- && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
- // Known offset into basePtr
- int64_t offset = CN->getSExtValue();
- int64_t rotamt = int64_t((offset & 0xf) - pso);
-
- if (rotamt < 0)
- rotamt += 16;
-
- rotate = DAG.getConstant(rotamt, MVT::i16);
-
- // Simplify the base pointer for this case:
- basePtr = basePtr.getOperand(0);
- if ((offset & ~0xf) > 0) {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant((offset & ~0xf), PtrVT));
- }
- } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
- || (basePtr.getOpcode() == SPUISD::IndirectAddr
- && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
- && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
- // Plain aligned a-form address: rotate into preferred slot
- // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
- int64_t rotamt = -pso;
- if (rotamt < 0)
- rotamt += 16;
- rotate = DAG.getConstant(rotamt, MVT::i16);
- } else {
- // Offset the rotate amount by the basePtr and the preferred slot
- // byte offset
- int64_t rotamt = -pso;
- if (rotamt < 0)
- rotamt += 16;
- rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
- basePtr,
- DAG.getConstant(rotamt, PtrVT));
- }
- } else {
- // Unaligned load: must be more pessimistic about addressing modes:
- if (basePtr.getOpcode() == ISD::ADD) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
- SDValue Flag;
-
- SDValue Op0 = basePtr.getOperand(0);
- SDValue Op1 = basePtr.getOperand(1);
-
- if (isa<ConstantSDNode>(Op1)) {
- // Convert the (add <ptr>, <const>) to an indirect address contained
- // in a register. Note that this is done because we need to avoid
- // creating a 0(reg) d-form address due to the SPU's block loads.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
- basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
- } else {
- // Convert the (add <arg1>, <arg2>) to an indirect address, which
- // will likely be lowered as a reg(reg) x-form address.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- }
- } else {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- }
-
- // Offset the rotate amount by the basePtr and the preferred slot
- // byte offset
- rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
- basePtr,
- DAG.getConstant(-pso, PtrVT));
- }
-
- // Do the load as a i128 to allow possible shifting
- SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
- lowMemPtr,
- LN->isVolatile(), LN->isNonTemporal(), false, 16);
-
- // When the size is not greater than alignment we get all data with just
- // one load
- if (alignment >= InVT.getSizeInBits()/8) {
- // Update the chain
- the_chain = low.getValue(1);
-
- // Rotate into the preferred slot:
- result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
- low.getValue(0), rotate);
-
- // Convert the loaded v16i8 vector to the appropriate vector type
- // specified by the operand:
- EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
- InVT, (128 / InVT.getSizeInBits()));
- result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
- DAG.getNode(ISD::BITCAST, dl, vecVT, result));
- }
- // When alignment is less than the size, we might need (known only at
- // run-time) two loads
- // TODO: if the memory address is composed only from constants, we have
- // extra kowledge, and might avoid the second load
- else {
- // storage position offset from lower 16 byte aligned memory chunk
- SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
- basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
- // get a registerfull of ones. (this implementation is a workaround: LLVM
- // cannot handle 128 bit signed int constants)
- SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
- ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
-
- SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
- DAG.getNode(ISD::ADD, dl, PtrVT,
- basePtr,
- DAG.getConstant(16, PtrVT)),
- highMemPtr,
- LN->isVolatile(), LN->isNonTemporal(), false,
- 16);
-
- the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
- high.getValue(1));
-
- // Shift the (possible) high part right to compensate the misalignemnt.
- // if there is no highpart (i.e. value is i64 and offset is 4), this
- // will zero out the high value.
- high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
- DAG.getNode(ISD::SUB, dl, MVT::i32,
- DAG.getConstant( 16, MVT::i32),
- offset
- ));
-
- // Shift the low similarly
- // TODO: add SPUISD::SHL_BYTES
- low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
-
- // Merge the two parts
- result = DAG.getNode(ISD::BITCAST, dl, vecVT,
- DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
-
- if (!InVT.isVector()) {
- result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
- }
-
- }
- // Handle extending loads by extending the scalar result:
- if (ExtType == ISD::SEXTLOAD) {
- result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
- } else if (ExtType == ISD::ZEXTLOAD) {
- result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
- } else if (ExtType == ISD::EXTLOAD) {
- unsigned NewOpc = ISD::ANY_EXTEND;
-
- if (OutVT.isFloatingPoint())
- NewOpc = ISD::FP_EXTEND;
-
- result = DAG.getNode(NewOpc, dl, OutVT, result);
- }
-
- SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
- SDValue retops[2] = {
- result,
- the_chain
- };
-
- result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
- retops, sizeof(retops) / sizeof(retops[0]));
- return result;
-}
-
-/// Custom lower stores for CellSPU
-/*!
- All CellSPU stores are aligned to 16-byte boundaries, so for elements
- within a 16-byte block, we have to generate a shuffle to insert the
- requested element into its place, then store the resulting block.
- */
-static SDValue
-LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
- StoreSDNode *SN = cast<StoreSDNode>(Op);
- SDValue Value = SN->getValue();
- EVT VT = Value.getValueType();
- EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- DebugLoc dl = Op.getDebugLoc();
- unsigned alignment = SN->getAlignment();
- SDValue result;
- EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
- (128 / StVT.getSizeInBits()));
- // Get pointerinfos to the memory chunk(s) that contain the data to load
- uint64_t mpi_offset = SN->getPointerInfo().Offset;
- mpi_offset -= mpi_offset%16;
- MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
- MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
-
-
- // two sanity checks
- assert( SN->getAddressingMode() == ISD::UNINDEXED
- && "we should get only UNINDEXED adresses");
- // clean aligned loads can be selected as-is
- if (StVT.getSizeInBits() == 128 && (alignment%16) == 0)
- return SDValue();
-
- SDValue alignLoadVec;
- SDValue basePtr = SN->getBasePtr();
- SDValue the_chain = SN->getChain();
- SDValue insertEltOffs;
-
- if ((alignment%16) == 0) {
- ConstantSDNode *CN;
- // Special cases for a known aligned load to simplify the base pointer
- // and insertion byte:
- if (basePtr.getOpcode() == ISD::ADD
- && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
- // Known offset into basePtr
- int64_t offset = CN->getSExtValue();
-
- // Simplify the base pointer for this case:
- basePtr = basePtr.getOperand(0);
- insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant((offset & 0xf), PtrVT));
-
- if ((offset & ~0xf) > 0) {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant((offset & ~0xf), PtrVT));
- }
- } else {
- // Otherwise, assume it's at byte 0 of basePtr
- insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- }
- } else {
- // Unaligned load: must be more pessimistic about addressing modes:
- if (basePtr.getOpcode() == ISD::ADD) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
- SDValue Flag;
-
- SDValue Op0 = basePtr.getOperand(0);
- SDValue Op1 = basePtr.getOperand(1);
-
- if (isa<ConstantSDNode>(Op1)) {
- // Convert the (add <ptr>, <const>) to an indirect address contained
- // in a register. Note that this is done because we need to avoid
- // creating a 0(reg) d-form address due to the SPU's block loads.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
- basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
- } else {
- // Convert the (add <arg1>, <arg2>) to an indirect address, which
- // will likely be lowered as a reg(reg) x-form address.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- }
- } else {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- }
-
- // Insertion point is solely determined by basePtr's contents
- insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- }
-
- // Load the lower part of the memory to which to store.
- SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
- lowMemPtr, SN->isVolatile(), SN->isNonTemporal(),
- false, 16);
-
- // if we don't need to store over the 16 byte boundary, one store suffices
- if (alignment >= StVT.getSizeInBits()/8) {
- // Update the chain
- the_chain = low.getValue(1);
-
- LoadSDNode *LN = cast<LoadSDNode>(low);
- SDValue theValue = SN->getValue();
-
- if (StVT != VT
- && (theValue.getOpcode() == ISD::AssertZext
- || theValue.getOpcode() == ISD::AssertSext)) {
- // Drill down and get the value for zero- and sign-extended
- // quantities
- theValue = theValue.getOperand(0);
- }
-
- // If the base pointer is already a D-form address, then just create
- // a new D-form address with a slot offset and the orignal base pointer.
- // Otherwise generate a D-form address with the slot offset relative
- // to the stack pointer, which is always aligned.
-#if !defined(NDEBUG)
- if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- errs() << "CellSPU LowerSTORE: basePtr = ";
- basePtr.getNode()->dump(&DAG);
- errs() << "\n";
- }
-#endif
-
- SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
- insertEltOffs);
- SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
- theValue);
-
- result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
- vectorizeOp, low,
- DAG.getNode(ISD::BITCAST, dl,
- MVT::v4i32, insertEltOp));
-
- result = DAG.getStore(the_chain, dl, result, basePtr,
- lowMemPtr,
- LN->isVolatile(), LN->isNonTemporal(),
- 16);
-
- }
- // do the store when it might cross the 16 byte memory access boundary.
- else {
- // TODO issue a warning if SN->isVolatile()== true? This is likely not
- // what the user wanted.
-
- // address offset from nearest lower 16byte alinged address
- SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
- SN->getBasePtr(),
- DAG.getConstant(0xf, MVT::i32));
- // 16 - offset
- SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
- DAG.getConstant( 16, MVT::i32),
- offset);
- // 16 - sizeof(Value)
- SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
- DAG.getConstant( 16, MVT::i32),
- DAG.getConstant( VT.getSizeInBits()/8,
- MVT::i32));
- // get a registerfull of ones
- SDValue ones = DAG.getConstant(-1, MVT::v4i32);
- ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
-
- // Create the 128 bit masks that have ones where the data to store is
- // located.
- SDValue lowmask, himask;
- // if the value to store don't fill up the an entire 128 bits, zero
- // out the last bits of the mask so that only the value we want to store
- // is masked.
- // this is e.g. in the case of store i32, align 2
- if (!VT.isVector()){
- Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
- lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
- lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
- surplus);
- Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
- Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
-
- }
- else {
- lowmask = ones;
- Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
- }
- // this will zero, if there are no data that goes to the high quad
- himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
- offset_compl);
- lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
- offset);
-
- // Load in the old data and zero out the parts that will be overwritten with
- // the new data to store.
- SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
- DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
- DAG.getConstant( 16, PtrVT)),
- highMemPtr,
- SN->isVolatile(), SN->isNonTemporal(),
- false, 16);
- the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
- hi.getValue(1));
-
- low = DAG.getNode(ISD::AND, dl, MVT::i128,
- DAG.getNode( ISD::BITCAST, dl, MVT::i128, low),
- DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
- hi = DAG.getNode(ISD::AND, dl, MVT::i128,
- DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi),
- DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
-
- // Shift the Value to store into place. rlow contains the parts that go to
- // the lower memory chunk, rhi has the parts that go to the upper one.
- SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
- rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
- SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
- offset_compl);
-
- // Merge the old data and the new data and store the results
- // Need to convert vectors here to integer as 'OR'ing floats assert
- rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
- DAG.getNode(ISD::BITCAST, dl, MVT::i128, low),
- DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow));
- rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
- DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi),
- DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi));
-
- low = DAG.getStore(the_chain, dl, rlow, basePtr,
- lowMemPtr,
- SN->isVolatile(), SN->isNonTemporal(), 16);
- hi = DAG.getStore(the_chain, dl, rhi,
- DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
- DAG.getConstant( 16, PtrVT)),
- highMemPtr,
- SN->isVolatile(), SN->isNonTemporal(), 16);
- result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
- hi.getValue(0));
- }
-
- return result;
-}
-
-//! Generate the address of a constant pool entry.
-static SDValue
-LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
- EVT PtrVT = Op.getValueType();
- ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
- const Constant *C = CP->getConstVal();
- SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
- SDValue Zero = DAG.getConstant(0, PtrVT);
- const TargetMachine &TM = DAG.getTarget();
- // FIXME there is no actual debug info here
- DebugLoc dl = Op.getDebugLoc();
-
- if (TM.getRelocationModel() == Reloc::Static) {
- if (!ST->usingLargeMem()) {
- // Just return the SDValue with the constant pool address in it.
- return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
- } else {
- SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
- SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
- return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
- }
- }
-
- llvm_unreachable("LowerConstantPool: Relocation model other than static"
- " not supported.");
-}
-
-//! Alternate entry point for generating the address of a constant pool entry
-SDValue
-SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
- return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
-}
-
-static SDValue
-LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
- EVT PtrVT = Op.getValueType();
- JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
- SDValue Zero = DAG.getConstant(0, PtrVT);
- const TargetMachine &TM = DAG.getTarget();
- // FIXME there is no actual debug info here
- DebugLoc dl = Op.getDebugLoc();
-
- if (TM.getRelocationModel() == Reloc::Static) {
- if (!ST->usingLargeMem()) {
- return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
- } else {
- SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
- SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
- return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
- }
- }
-
- llvm_unreachable("LowerJumpTable: Relocation model other than static"
- " not supported.");
-}
-
-static SDValue
-LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
- EVT PtrVT = Op.getValueType();
- GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
- const GlobalValue *GV = GSDN->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
- PtrVT, GSDN->getOffset());
- const TargetMachine &TM = DAG.getTarget();
- SDValue Zero = DAG.getConstant(0, PtrVT);
- // FIXME there is no actual debug info here
- DebugLoc dl = Op.getDebugLoc();
-
- if (TM.getRelocationModel() == Reloc::Static) {
- if (!ST->usingLargeMem()) {
- return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
- } else {
- SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
- SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
- return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
- }
- } else {
- report_fatal_error("LowerGlobalAddress: Relocation model other than static"
- "not supported.");
- /*NOTREACHED*/
- }
-}
-
-//! Custom lower double precision floating point constants
-static SDValue
-LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
- // FIXME there is no actual debug info here
- DebugLoc dl = Op.getDebugLoc();
-
- if (VT == MVT::f64) {
- ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
-
- assert((FP != 0) &&
- "LowerConstantFP: Node is not ConstantFPSDNode");
-
- uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
- SDValue T = DAG.getConstant(dbits, MVT::i64);
- SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
- return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
- DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec));
- }
-
- return SDValue();
-}
-
-SDValue
-SPUTargetLowering::LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg>
- &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals)
- const {
-
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
-
- unsigned ArgOffset = SPUFrameLowering::minStackSize();
- unsigned ArgRegIdx = 0;
- unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
-
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
- // FIXME: allow for other calling conventions
- CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
-
- // Add DAG nodes to load the arguments or copy them out of registers.
- for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
- EVT ObjectVT = Ins[ArgNo].VT;
- unsigned ObjSize = ObjectVT.getSizeInBits()/8;
- SDValue ArgVal;
- CCValAssign &VA = ArgLocs[ArgNo];
-
- if (VA.isRegLoc()) {
- const TargetRegisterClass *ArgRegClass;
-
- switch (ObjectVT.getSimpleVT().SimpleTy) {
- default:
- report_fatal_error("LowerFormalArguments Unhandled argument type: " +
- Twine(ObjectVT.getEVTString()));
- case MVT::i8:
- ArgRegClass = &SPU::R8CRegClass;
- break;
- case MVT::i16:
- ArgRegClass = &SPU::R16CRegClass;
- break;
- case MVT::i32:
- ArgRegClass = &SPU::R32CRegClass;
- break;
- case MVT::i64:
- ArgRegClass = &SPU::R64CRegClass;
- break;
- case MVT::i128:
- ArgRegClass = &SPU::GPRCRegClass;
- break;
- case MVT::f32:
- ArgRegClass = &SPU::R32FPRegClass;
- break;
- case MVT::f64:
- ArgRegClass = &SPU::R64FPRegClass;
- break;
- case MVT::v2f64:
- case MVT::v4f32:
- case MVT::v2i64:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v16i8:
- ArgRegClass = &SPU::VECREGRegClass;
- break;
- }
-
- unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
- RegInfo.addLiveIn(VA.getLocReg(), VReg);
- ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
- ++ArgRegIdx;
- } else {
- // We need to load the argument to a virtual register if we determined
- // above that we ran out of physical registers of the appropriate type
- // or we're forced to do vararg
- int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
- SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
- false, false, false, 0);
- ArgOffset += StackSlotSize;
- }
-
- InVals.push_back(ArgVal);
- // Update the chain
- Chain = ArgVal.getOperand(0);
- }
-
- // vararg handling:
- if (isVarArg) {
- // FIXME: we should be able to query the argument registers from
- // tablegen generated code.
- static const uint16_t ArgRegs[] = {
- SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
- SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
- SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
- SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
- SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
- SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
- SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
- SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
- SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
- SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
- SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
- };
- // size of ArgRegs array
- const unsigned NumArgRegs = 77;
-
- // We will spill (79-3)+1 registers to the stack
- SmallVector<SDValue, 79-3+1> MemOps;
-
- // Create the frame slot
- for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
- FuncInfo->setVarArgsFrameIndex(
- MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
- SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
- unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::VECREGRegClass);
- SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
- SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
- false, false, 0);
- Chain = Store.getOperand(0);
- MemOps.push_back(Store);
-
- // Increment address by stack slot size for the next stored argument
- ArgOffset += StackSlotSize;
- }
- if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOps[0], MemOps.size());
- }
-
- return Chain;
-}
-
-/// isLSAAddress - Return the immediate to use if the specified
-/// value is representable as a LSA address.
-static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
- if (!C) return 0;
-
- int Addr = C->getZExtValue();
- if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
- (Addr << 14 >> 14) != Addr)
- return 0; // Top 14 bits have to be sext of immediate.
-
- return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
-}
-
-SDValue
-SPUTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const {
- SelectionDAG &DAG = CLI.DAG;
- DebugLoc &dl = CLI.DL;
- SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
- SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
- SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
- SDValue Chain = CLI.Chain;
- SDValue Callee = CLI.Callee;
- bool &isTailCall = CLI.IsTailCall;
- CallingConv::ID CallConv = CLI.CallConv;
- bool isVarArg = CLI.IsVarArg;
-
- // CellSPU target does not yet support tail call optimization.
- isTailCall = false;
-
- const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
- unsigned NumOps = Outs.size();
- unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
-
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
- // FIXME: allow for other calling conventions
- CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
-
- const unsigned NumArgRegs = ArgLocs.size();
-
-
- // Handy pointer type
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-
- // Set up a copy of the stack pointer for use loading and storing any
- // arguments that may not fit in the registers available for argument
- // passing.
- SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
-
- // Figure out which arguments are going to go in registers, and which in
- // memory.
- unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR]
- unsigned ArgRegIdx = 0;
-
- // Keep track of registers passing arguments
- std::vector<std::pair<unsigned, SDValue> > RegsToPass;
- // And the arguments passed on the stack
- SmallVector<SDValue, 8> MemOpChains;
-
- for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
- SDValue Arg = OutVals[ArgRegIdx];
- CCValAssign &VA = ArgLocs[ArgRegIdx];
-
- // PtrOff will be used to store the current argument to the stack if a
- // register cannot be found for it.
- SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
- PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
-
- switch (Arg.getValueType().getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unexpected ValueType for argument!");
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- case MVT::i128:
- case MVT::f32:
- case MVT::f64:
- case MVT::v2i64:
- case MVT::v2f64:
- case MVT::v4f32:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v16i8:
- if (ArgRegIdx != NumArgRegs) {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else {
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
- MachinePointerInfo(),
- false, false, 0));
- ArgOffset += StackSlotSize;
- }
- break;
- }
- }
-
- // Accumulate how many bytes are to be pushed on the stack, including the
- // linkage area, and parameter passing area. According to the SPU ABI,
- // we minimally need space for [LR] and [SP].
- unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize();
-
- // Insert a call sequence start
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
- true));
-
- if (!MemOpChains.empty()) {
- // Adjust the stack pointer for the stack arguments.
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
- }
-
- // Build a sequence of copy-to-reg nodes chained together with token chain
- // and flag operands which copy the outgoing args into the appropriate regs.
- SDValue InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
-
- SmallVector<SDValue, 8> Ops;
- unsigned CallOpc = SPUISD::CALL;
-
- // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
- // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
- // node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- const GlobalValue *GV = G->getGlobal();
- EVT CalleeVT = Callee.getValueType();
- SDValue Zero = DAG.getConstant(0, PtrVT);
- SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
-
- if (!ST->usingLargeMem()) {
- // Turn calls to targets that are defined (i.e., have bodies) into BRSL
- // style calls, otherwise, external symbols are BRASL calls. This assumes
- // that declared/defined symbols are in the same compilation unit and can
- // be reached through PC-relative jumps.
- //
- // NOTE:
- // This may be an unsafe assumption for JIT and really large compilation
- // units.
- if (GV->isDeclaration()) {
- Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
- } else {
- Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
- }
- } else {
- // "Large memory" mode: Turn all calls into indirect calls with a X-form
- // address pairs:
- Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
- }
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- EVT CalleeVT = Callee.getValueType();
- SDValue Zero = DAG.getConstant(0, PtrVT);
- SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
- Callee.getValueType());
-
- if (!ST->usingLargeMem()) {
- Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
- } else {
- Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
- }
- } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
- // If this is an absolute destination address that appears to be a legal
- // local store address, use the munged value.
- Callee = SDValue(Dest, 0);
- }
-
- Ops.push_back(Chain);
- Ops.push_back(Callee);
-
- // Add argument registers to the end of the list so that they are known live
- // into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
-
- if (InFlag.getNode())
- Ops.push_back(InFlag);
- // Returns a chain and a flag for retval copy to use.
- Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue),
- &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
- DAG.getIntPtrConstant(0, true), InFlag);
- if (!Ins.empty())
- InFlag = Chain.getValue(1);
-
- // If the function returns void, just return the chain.
- if (Ins.empty())
- return Chain;
-
- // Now handle the return value(s)
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
- CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
-
-
- // If the call has results, copy the values out of the ret val registers.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
- CCValAssign VA = RVLocs[i];
-
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
- InFlag);
- Chain = Val.getValue(1);
- InFlag = Val.getValue(2);
- InVals.push_back(Val);
- }
-
- return Chain;
-}
-
-SDValue
-SPUTargetLowering::LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const {
-
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
- CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
-
- // If this is the first return lowered for this function, add the regs to the
- // liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
- SDValue Flag;
-
- // Copy the result values into the output registers.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
- CCValAssign &VA = RVLocs[i];
- assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- OutVals[i], Flag);
- Flag = Chain.getValue(1);
- }
-
- if (Flag.getNode())
- return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
- else
- return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
-}
-
-
-//===----------------------------------------------------------------------===//
-// Vector related lowering:
-//===----------------------------------------------------------------------===//
-
-static ConstantSDNode *
-getVecImm(SDNode *N) {
- SDValue OpVal(0, 0);
-
- // Check to see if this buildvec has a single non-undef value in its elements.
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
- if (OpVal.getNode() == 0)
- OpVal = N->getOperand(i);
- else if (OpVal != N->getOperand(i))
- return 0;
- }
-
- if (OpVal.getNode() != 0) {
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
- return CN;
- }
- }
-
- return 0;
-}
-
-/// get_vec_i18imm - Test if this vector is a vector filled with the same value
-/// and the value fits into an unsigned 18-bit constant, and if so, return the
-/// constant
-SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType) {
- if (ConstantSDNode *CN = getVecImm(N)) {
- uint64_t Value = CN->getZExtValue();
- if (ValueType == MVT::i64) {
- uint64_t UValue = CN->getZExtValue();
- uint32_t upper = uint32_t(UValue >> 32);
- uint32_t lower = uint32_t(UValue);
- if (upper != lower)
- return SDValue();
- Value = Value >> 32;
- }
- if (Value <= 0x3ffff)
- return DAG.getTargetConstant(Value, ValueType);
- }
-
- return SDValue();
-}
-
-/// get_vec_i16imm - Test if this vector is a vector filled with the same value
-/// and the value fits into a signed 16-bit constant, and if so, return the
-/// constant
-SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType) {
- if (ConstantSDNode *CN = getVecImm(N)) {
- int64_t Value = CN->getSExtValue();
- if (ValueType == MVT::i64) {
- uint64_t UValue = CN->getZExtValue();
- uint32_t upper = uint32_t(UValue >> 32);
- uint32_t lower = uint32_t(UValue);
- if (upper != lower)
- return SDValue();
- Value = Value >> 32;
- }
- if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
- return DAG.getTargetConstant(Value, ValueType);
- }
- }
-
- return SDValue();
-}
-
-/// get_vec_i10imm - Test if this vector is a vector filled with the same value
-/// and the value fits into a signed 10-bit constant, and if so, return the
-/// constant
-SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType) {
- if (ConstantSDNode *CN = getVecImm(N)) {
- int64_t Value = CN->getSExtValue();
- if (ValueType == MVT::i64) {
- uint64_t UValue = CN->getZExtValue();
- uint32_t upper = uint32_t(UValue >> 32);
- uint32_t lower = uint32_t(UValue);
- if (upper != lower)
- return SDValue();
- Value = Value >> 32;
- }
- if (isInt<10>(Value))
- return DAG.getTargetConstant(Value, ValueType);
- }
-
- return SDValue();
-}
-
-/// get_vec_i8imm - Test if this vector is a vector filled with the same value
-/// and the value fits into a signed 8-bit constant, and if so, return the
-/// constant.
-///
-/// @note: The incoming vector is v16i8 because that's the only way we can load
-/// constant vectors. Thus, we test to see if the upper and lower bytes are the
-/// same value.
-SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType) {
- if (ConstantSDNode *CN = getVecImm(N)) {
- int Value = (int) CN->getZExtValue();
- if (ValueType == MVT::i16
- && Value <= 0xffff /* truncated from uint64_t */
- && ((short) Value >> 8) == ((short) Value & 0xff))
- return DAG.getTargetConstant(Value & 0xff, ValueType);
- else if (ValueType == MVT::i8
- && (Value & 0xff) == Value)
- return DAG.getTargetConstant(Value, ValueType);
- }
-
- return SDValue();
-}
-
-/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
-/// and the value fits into a signed 16-bit constant, and if so, return the
-/// constant
-SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType) {
- if (ConstantSDNode *CN = getVecImm(N)) {
- uint64_t Value = CN->getZExtValue();
- if ((ValueType == MVT::i32
- && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
- || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
- return DAG.getTargetConstant(Value >> 16, ValueType);
- }
-
- return SDValue();
-}
-
-/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
-SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
- if (ConstantSDNode *CN = getVecImm(N)) {
- return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
- }
-
- return SDValue();
-}
-
-/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
-SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
- if (ConstantSDNode *CN = getVecImm(N)) {
- return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
- }
-
- return SDValue();
-}
-
-//! Lower a BUILD_VECTOR instruction creatively:
-static SDValue
-LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
- EVT EltVT = VT.getVectorElementType();
- DebugLoc dl = Op.getDebugLoc();
- BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
- assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
- unsigned minSplatBits = EltVT.getSizeInBits();
-
- if (minSplatBits < 16)
- minSplatBits = 16;
-
- APInt APSplatBits, APSplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
-
- if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
- HasAnyUndefs, minSplatBits)
- || minSplatBits < SplatBitSize)
- return SDValue(); // Wasn't a constant vector or splat exceeded min
-
- uint64_t SplatBits = APSplatBits.getZExtValue();
-
- switch (VT.getSimpleVT().SimpleTy) {
- default:
- report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
- Twine(VT.getEVTString()));
- /*NOTREACHED*/
- case MVT::v4f32: {
- uint32_t Value32 = uint32_t(SplatBits);
- assert(SplatBitSize == 32
- && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
- // NOTE: pretend the constant is an integer. LLVM won't load FP constants
- SDValue T = DAG.getConstant(Value32, MVT::i32);
- return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
- DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
- }
- case MVT::v2f64: {
- uint64_t f64val = uint64_t(SplatBits);
- assert(SplatBitSize == 64
- && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
- // NOTE: pretend the constant is an integer. LLVM won't load FP constants
- SDValue T = DAG.getConstant(f64val, MVT::i64);
- return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
- DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
- }
- case MVT::v16i8: {
- // 8-bit constants have to be expanded to 16-bits
- unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
- SmallVector<SDValue, 8> Ops;
-
- Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
- return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
- }
- case MVT::v8i16: {
- unsigned short Value16 = SplatBits;
- SDValue T = DAG.getConstant(Value16, EltVT);
- SmallVector<SDValue, 8> Ops;
-
- Ops.assign(8, T);
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
- }
- case MVT::v4i32: {
- SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
- }
- case MVT::v2i64: {
- return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
- }
- }
-}
-
-/*!
- */
-SDValue
-SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
- DebugLoc dl) {
- uint32_t upper = uint32_t(SplatVal >> 32);
- uint32_t lower = uint32_t(SplatVal);
-
- if (upper == lower) {
- // Magic constant that can be matched by IL, ILA, et. al.
- SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
- return DAG.getNode(ISD::BITCAST, dl, OpVT,
- DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- Val, Val, Val, Val));
- } else {
- bool upper_special, lower_special;
-
- // NOTE: This code creates common-case shuffle masks that can be easily
- // detected as common expressions. It is not attempting to create highly
- // specialized masks to replace any and all 0's, 0xff's and 0x80's.
-
- // Detect if the upper or lower half is a special shuffle mask pattern:
- upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
- lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
-
- // Both upper and lower are special, lower to a constant pool load:
- if (lower_special && upper_special) {
- SDValue UpperVal = DAG.getConstant(upper, MVT::i32);
- SDValue LowerVal = DAG.getConstant(lower, MVT::i32);
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- UpperVal, LowerVal, UpperVal, LowerVal);
- return DAG.getNode(ISD::BITCAST, dl, OpVT, BV);
- }
-
- SDValue LO32;
- SDValue HI32;
- SmallVector<SDValue, 16> ShufBytes;
- SDValue Result;
-
- // Create lower vector if not a special pattern
- if (!lower_special) {
- SDValue LO32C = DAG.getConstant(lower, MVT::i32);
- LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
- DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- LO32C, LO32C, LO32C, LO32C));
- }
-
- // Create upper vector if not a special pattern
- if (!upper_special) {
- SDValue HI32C = DAG.getConstant(upper, MVT::i32);
- HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
- DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- HI32C, HI32C, HI32C, HI32C));
- }
-
- // If either upper or lower are special, then the two input operands are
- // the same (basically, one of them is a "don't care")
- if (lower_special)
- LO32 = HI32;
- if (upper_special)
- HI32 = LO32;
-
- for (int i = 0; i < 4; ++i) {
- uint64_t val = 0;
- for (int j = 0; j < 4; ++j) {
- SDValue V;
- bool process_upper, process_lower;
- val <<= 8;
- process_upper = (upper_special && (i & 1) == 0);
- process_lower = (lower_special && (i & 1) == 1);
-
- if (process_upper || process_lower) {
- if ((process_upper && upper == 0)
- || (process_lower && lower == 0))
- val |= 0x80;
- else if ((process_upper && upper == 0xffffffff)
- || (process_lower && lower == 0xffffffff))
- val |= 0xc0;
- else if ((process_upper && upper == 0x80000000)
- || (process_lower && lower == 0x80000000))
- val |= (j == 0 ? 0xe0 : 0x80);
- } else
- val |= i * 4 + j + ((i & 1) * 16);
- }
-
- ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
- }
-
- return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
- DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- &ShufBytes[0], ShufBytes.size()));
- }
-}
-
-/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
-/// which the Cell can operate. The code inspects V3 to ascertain whether the
-/// permutation vector, V3, is monotonically increasing with one "exception"
-/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
-/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
-/// In either case, the net result is going to eventually invoke SHUFB to
-/// permute/shuffle the bytes from V1 and V2.
-/// \note
-/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
-/// control word for byte/halfword/word insertion. This takes care of a single
-/// element move from V2 into V1.
-/// \note
-/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
-static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
- const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
- SDValue V1 = Op.getOperand(0);
- SDValue V2 = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
-
- if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
-
- // If we have a single element being moved from V1 to V2, this can be handled
- // using the C*[DX] compute mask instructions, but the vector elements have
- // to be monotonically increasing with one exception element, and the source
- // slot of the element to move must be the same as the destination.
- EVT VecVT = V1.getValueType();
- EVT EltVT = VecVT.getVectorElementType();
- unsigned EltsFromV2 = 0;
- unsigned V2EltOffset = 0;
- unsigned V2EltIdx0 = 0;
- unsigned CurrElt = 0;
- unsigned MaxElts = VecVT.getVectorNumElements();
- unsigned PrevElt = 0;
- bool monotonic = true;
- bool rotate = true;
- int rotamt=0;
- EVT maskVT; // which of the c?d instructions to use
-
- if (EltVT == MVT::i8) {
- V2EltIdx0 = 16;
- maskVT = MVT::v16i8;
- } else if (EltVT == MVT::i16) {
- V2EltIdx0 = 8;
- maskVT = MVT::v8i16;
- } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
- V2EltIdx0 = 4;
- maskVT = MVT::v4i32;
- } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
- V2EltIdx0 = 2;
- maskVT = MVT::v2i64;
- } else
- llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
-
- for (unsigned i = 0; i != MaxElts; ++i) {
- if (SVN->getMaskElt(i) < 0)
- continue;
-
- unsigned SrcElt = SVN->getMaskElt(i);
-
- if (monotonic) {
- if (SrcElt >= V2EltIdx0) {
- // TODO: optimize for the monotonic case when several consecutive
- // elements are taken form V2. Do we ever get such a case?
- if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
- V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
- else
- monotonic = false;
- ++EltsFromV2;
- } else if (CurrElt != SrcElt) {
- monotonic = false;
- }
-
- ++CurrElt;
- }
-
- if (rotate) {
- if (PrevElt > 0 && SrcElt < MaxElts) {
- if ((PrevElt == SrcElt - 1)
- || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
- PrevElt = SrcElt;
- } else {
- rotate = false;
- }
- } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
- // First time or after a "wrap around"
- rotamt = SrcElt-i;
- PrevElt = SrcElt;
- } else {
- // This isn't a rotation, takes elements from vector 2
- rotate = false;
- }
- }
- }
-
- if (EltsFromV2 == 1 && monotonic) {
- // Compute mask and shuffle
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-
- // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
- // R1 ($sp) is used here only as it is guaranteed to have last bits zero
- SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- DAG.getRegister(SPU::R1, PtrVT),
- DAG.getConstant(V2EltOffset, MVT::i32));
- SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
- maskVT, Pointer);
-
- // Use shuffle mask in SHUFB synthetic instruction:
- return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
- ShufMaskOp);
- } else if (rotate) {
- if (rotamt < 0)
- rotamt +=MaxElts;
- rotamt *= EltVT.getSizeInBits()/8;
- return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
- V1, DAG.getConstant(rotamt, MVT::i16));
- } else {
- // Convert the SHUFFLE_VECTOR mask's input element units to the
- // actual bytes.
- unsigned BytesPerElement = EltVT.getSizeInBits()/8;
-
- SmallVector<SDValue, 16> ResultMask;
- for (unsigned i = 0, e = MaxElts; i != e; ++i) {
- unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
-
- for (unsigned j = 0; j < BytesPerElement; ++j)
- ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
- }
- SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
- &ResultMask[0], ResultMask.size());
- return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
- }
-}
-
-static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
- SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
- DebugLoc dl = Op.getDebugLoc();
-
- if (Op0.getNode()->getOpcode() == ISD::Constant) {
- // For a constant, build the appropriate constant vector, which will
- // eventually simplify to a vector register load.
-
- ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
- SmallVector<SDValue, 16> ConstVecValues;
- EVT VT;
- size_t n_copies;
-
- // Create a constant vector:
- switch (Op.getValueType().getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unexpected constant value type in "
- "LowerSCALAR_TO_VECTOR");
- case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
- case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
- case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
- case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
- case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
- case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
- }
-
- SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
- for (size_t j = 0; j < n_copies; ++j)
- ConstVecValues.push_back(CValue);
-
- return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
- &ConstVecValues[0], ConstVecValues.size());
- } else {
- // Otherwise, copy the value from one register to another:
- switch (Op0.getValueType().getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- case MVT::f32:
- case MVT::f64:
- return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
- }
- }
-}
-
-static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
- SDValue N = Op.getOperand(0);
- SDValue Elt = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
- SDValue retval;
-
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
- // Constant argument:
- int EltNo = (int) C->getZExtValue();
-
- // sanity checks:
- if (VT == MVT::i8 && EltNo >= 16)
- llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
- else if (VT == MVT::i16 && EltNo >= 8)
- llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
- else if (VT == MVT::i32 && EltNo >= 4)
- llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
- else if (VT == MVT::i64 && EltNo >= 2)
- llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
-
- if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
- // i32 and i64: Element 0 is the preferred slot
- return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
- }
-
- // Need to generate shuffle mask and extract:
- int prefslot_begin = -1, prefslot_end = -1;
- int elt_byte = EltNo * VT.getSizeInBits() / 8;
-
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Invalid value type!");
- case MVT::i8: {
- prefslot_begin = prefslot_end = 3;
- break;
- }
- case MVT::i16: {
- prefslot_begin = 2; prefslot_end = 3;
- break;
- }
- case MVT::i32:
- case MVT::f32: {
- prefslot_begin = 0; prefslot_end = 3;
- break;
- }
- case MVT::i64:
- case MVT::f64: {
- prefslot_begin = 0; prefslot_end = 7;
- break;
- }
- }
-
- assert(prefslot_begin != -1 && prefslot_end != -1 &&
- "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
-
- unsigned int ShufBytes[16] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
- };
- for (int i = 0; i < 16; ++i) {
- // zero fill uppper part of preferred slot, don't care about the
- // other slots:
- unsigned int mask_val;
- if (i <= prefslot_end) {
- mask_val =
- ((i < prefslot_begin)
- ? 0x80
- : elt_byte + (i - prefslot_begin));
-
- ShufBytes[i] = mask_val;
- } else
- ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
- }
-
- SDValue ShufMask[4];
- for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
- unsigned bidx = i * 4;
- unsigned int bits = ((ShufBytes[bidx] << 24) |
- (ShufBytes[bidx+1] << 16) |
- (ShufBytes[bidx+2] << 8) |
- ShufBytes[bidx+3]);
- ShufMask[i] = DAG.getConstant(bits, MVT::i32);
- }
-
- SDValue ShufMaskVec =
- DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
-
- retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
- DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
- N, N, ShufMaskVec));
- } else {
- // Variable index: Rotate the requested element into slot 0, then replicate
- // slot 0 across the vector
- EVT VecVT = N.getValueType();
- if (!VecVT.isSimple() || !VecVT.isVector()) {
- report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
- "vector type!");
- }
-
- // Make life easier by making sure the index is zero-extended to i32
- if (Elt.getValueType() != MVT::i32)
- Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
-
- // Scale the index to a bit/byte shift quantity
- APInt scaleFactor =
- APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
- unsigned scaleShift = scaleFactor.logBase2();
- SDValue vecShift;
-
- if (scaleShift > 0) {
- // Scale the shift factor:
- Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
- DAG.getConstant(scaleShift, MVT::i32));
- }
-
- vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
-
- // Replicate the bytes starting at byte 0 across the entire vector (for
- // consistency with the notion of a unified register set)
- SDValue replicate;
-
- switch (VT.getSimpleVT().SimpleTy) {
- default:
- report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
- "type");
- /*NOTREACHED*/
- case MVT::i8: {
- SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
- replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- factor, factor, factor, factor);
- break;
- }
- case MVT::i16: {
- SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
- replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- factor, factor, factor, factor);
- break;
- }
- case MVT::i32:
- case MVT::f32: {
- SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
- replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- factor, factor, factor, factor);
- break;
- }
- case MVT::i64:
- case MVT::f64: {
- SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
- SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
- replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- loFactor, hiFactor, loFactor, hiFactor);
- break;
- }
- }
-
- retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
- DAG.getNode(SPUISD::SHUFB, dl, VecVT,
- vecShift, vecShift, replicate));
- }
-
- return retval;
-}
-
-static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
- SDValue VecOp = Op.getOperand(0);
- SDValue ValOp = Op.getOperand(1);
- SDValue IdxOp = Op.getOperand(2);
- DebugLoc dl = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- EVT eltVT = ValOp.getValueType();
-
- // use 0 when the lane to insert to is 'undef'
- int64_t Offset=0;
- if (IdxOp.getOpcode() != ISD::UNDEF) {
- ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
- assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
- Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
- }
-
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- // Use $sp ($1) because it's always 16-byte aligned and it's available:
- SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- DAG.getRegister(SPU::R1, PtrVT),
- DAG.getConstant(Offset, PtrVT));
- // widen the mask when dealing with half vectors
- EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
- 128/ VT.getVectorElementType().getSizeInBits());
- SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
-
- SDValue result =
- DAG.getNode(SPUISD::SHUFB, dl, VT,
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
- VecOp,
- DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask));
-
- return result;
-}
-
-static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
- const TargetLowering &TLI)
-{
- SDValue N0 = Op.getOperand(0); // Everything has at least one operand
- DebugLoc dl = Op.getDebugLoc();
- EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType());
-
- assert(Op.getValueType() == MVT::i8);
- switch (Opc) {
- default:
- llvm_unreachable("Unhandled i8 math operator");
- case ISD::ADD: {
- // 8-bit addition: Promote the arguments up to 16-bits and truncate
- // the result:
- SDValue N1 = Op.getOperand(1);
- N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
- N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
- DAG.getNode(Opc, dl, MVT::i16, N0, N1));
-
- }
-
- case ISD::SUB: {
- // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
- // the result:
- SDValue N1 = Op.getOperand(1);
- N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
- N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
- DAG.getNode(Opc, dl, MVT::i16, N0, N1));
- }
- case ISD::ROTR:
- case ISD::ROTL: {
- SDValue N1 = Op.getOperand(1);
- EVT N1VT = N1.getValueType();
-
- N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
- if (!N1VT.bitsEq(ShiftVT)) {
- unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
- ? ISD::ZERO_EXTEND
- : ISD::TRUNCATE;
- N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
- }
-
- // Replicate lower 8-bits into upper 8:
- SDValue ExpandArg =
- DAG.getNode(ISD::OR, dl, MVT::i16, N0,
- DAG.getNode(ISD::SHL, dl, MVT::i16,
- N0, DAG.getConstant(8, MVT::i32)));
-
- // Truncate back down to i8
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
- DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
- }
- case ISD::SRL:
- case ISD::SHL: {
- SDValue N1 = Op.getOperand(1);
- EVT N1VT = N1.getValueType();
-
- N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
- if (!N1VT.bitsEq(ShiftVT)) {
- unsigned N1Opc = ISD::ZERO_EXTEND;
-
- if (N1.getValueType().bitsGT(ShiftVT))
- N1Opc = ISD::TRUNCATE;
-
- N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
- }
-
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
- DAG.getNode(Opc, dl, MVT::i16, N0, N1));
- }
- case ISD::SRA: {
- SDValue N1 = Op.getOperand(1);
- EVT N1VT = N1.getValueType();
-
- N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
- if (!N1VT.bitsEq(ShiftVT)) {
- unsigned N1Opc = ISD::SIGN_EXTEND;
-
- if (N1VT.bitsGT(ShiftVT))
- N1Opc = ISD::TRUNCATE;
- N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
- }
-
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
- DAG.getNode(Opc, dl, MVT::i16, N0, N1));
- }
- case ISD::MUL: {
- SDValue N1 = Op.getOperand(1);
-
- N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
- N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
- DAG.getNode(Opc, dl, MVT::i16, N0, N1));
- }
- }
-}
-
-//! Lower byte immediate operations for v16i8 vectors:
-static SDValue
-LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
- SDValue ConstVec;
- SDValue Arg;
- EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
-
- ConstVec = Op.getOperand(0);
- Arg = Op.getOperand(1);
- if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
- if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
- ConstVec = ConstVec.getOperand(0);
- } else {
- ConstVec = Op.getOperand(1);
- Arg = Op.getOperand(0);
- if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
- ConstVec = ConstVec.getOperand(0);
- }
- }
- }
-
- if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
- BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
- assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
-
- APInt APSplatBits, APSplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
-
- if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
- HasAnyUndefs, minSplatBits)
- && minSplatBits <= SplatBitSize) {
- uint64_t SplatBits = APSplatBits.getZExtValue();
- SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
-
- SmallVector<SDValue, 16> tcVec;
- tcVec.assign(16, tc);
- return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
- DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
- }
- }
-
- // These operations (AND, OR, XOR) are legal, they just couldn't be custom
- // lowered. Return the operation, rather than a null SDValue.
- return Op;
-}
-
-//! Custom lowering for CTPOP (count population)
-/*!
- Custom lowering code that counts the number ones in the input
- operand. SPU has such an instruction, but it counts the number of
- ones per byte, which then have to be accumulated.
-*/
-static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
- EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
- VT, (128 / VT.getSizeInBits()));
- DebugLoc dl = Op.getDebugLoc();
-
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Invalid value type!");
- case MVT::i8: {
- SDValue N = Op.getOperand(0);
- SDValue Elt0 = DAG.getConstant(0, MVT::i32);
-
- SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
- SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
-
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
- }
-
- case MVT::i16: {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
-
- unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
-
- SDValue N = Op.getOperand(0);
- SDValue Elt0 = DAG.getConstant(0, MVT::i16);
- SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
- SDValue Shift1 = DAG.getConstant(8, MVT::i32);
-
- SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
- SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
-
- // CNTB_result becomes the chain to which all of the virtual registers
- // CNTB_reg, SUM1_reg become associated:
- SDValue CNTB_result =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
-
- SDValue CNTB_rescopy =
- DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
-
- SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
-
- return DAG.getNode(ISD::AND, dl, MVT::i16,
- DAG.getNode(ISD::ADD, dl, MVT::i16,
- DAG.getNode(ISD::SRL, dl, MVT::i16,
- Tmp1, Shift1),
- Tmp1),
- Mask0);
- }
-
- case MVT::i32: {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
-
- unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
- unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
-
- SDValue N = Op.getOperand(0);
- SDValue Elt0 = DAG.getConstant(0, MVT::i32);
- SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
- SDValue Shift1 = DAG.getConstant(16, MVT::i32);
- SDValue Shift2 = DAG.getConstant(8, MVT::i32);
-
- SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
- SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
-
- // CNTB_result becomes the chain to which all of the virtual registers
- // CNTB_reg, SUM1_reg become associated:
- SDValue CNTB_result =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
-
- SDValue CNTB_rescopy =
- DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
-
- SDValue Comp1 =
- DAG.getNode(ISD::SRL, dl, MVT::i32,
- DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
- Shift1);
-
- SDValue Sum1 =
- DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
- DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
-
- SDValue Sum1_rescopy =
- DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
-
- SDValue Comp2 =
- DAG.getNode(ISD::SRL, dl, MVT::i32,
- DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
- Shift2);
- SDValue Sum2 =
- DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
- DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
-
- return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
- }
-
- case MVT::i64:
- break;
- }
-
- return SDValue();
-}
-
-//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
-/*!
- f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
- All conversions to i64 are expanded to a libcall.
- */
-static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
- const SPUTargetLowering &TLI) {
- EVT OpVT = Op.getValueType();
- SDValue Op0 = Op.getOperand(0);
- EVT Op0VT = Op0.getValueType();
-
- if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
- || OpVT == MVT::i64) {
- // Convert f32 / f64 to i32 / i64 via libcall.
- RTLIB::Libcall LC =
- (Op.getOpcode() == ISD::FP_TO_SINT)
- ? RTLIB::getFPTOSINT(Op0VT, OpVT)
- : RTLIB::getFPTOUINT(Op0VT, OpVT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
- SDValue Dummy;
- return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
- }
-
- return Op;
-}
-
-//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
-/*!
- i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
- All conversions from i64 are expanded to a libcall.
- */
-static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
- const SPUTargetLowering &TLI) {
- EVT OpVT = Op.getValueType();
- SDValue Op0 = Op.getOperand(0);
- EVT Op0VT = Op0.getValueType();
-
- if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
- || Op0VT == MVT::i64) {
- // Convert i32, i64 to f64 via libcall:
- RTLIB::Libcall LC =
- (Op.getOpcode() == ISD::SINT_TO_FP)
- ? RTLIB::getSINTTOFP(Op0VT, OpVT)
- : RTLIB::getUINTTOFP(Op0VT, OpVT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
- SDValue Dummy;
- return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
- }
-
- return Op;
-}
-
-//! Lower ISD::SETCC
-/*!
- This handles MVT::f64 (double floating point) condition lowering
- */
-static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
- const TargetLowering &TLI) {
- CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
- DebugLoc dl = Op.getDebugLoc();
- assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
-
- SDValue lhs = Op.getOperand(0);
- SDValue rhs = Op.getOperand(1);
- EVT lhsVT = lhs.getValueType();
- assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
-
- EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
- APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
- EVT IntVT(MVT::i64);
-
- // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
- // selected to a NOP:
- SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs);
- SDValue lhsHi32 =
- DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
- DAG.getNode(ISD::SRL, dl, IntVT,
- i64lhs, DAG.getConstant(32, MVT::i32)));
- SDValue lhsHi32abs =
- DAG.getNode(ISD::AND, dl, MVT::i32,
- lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
- SDValue lhsLo32 =
- DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
-
- // SETO and SETUO only use the lhs operand:
- if (CC->get() == ISD::SETO) {
- // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
- // SETUO
- APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
- return DAG.getNode(ISD::XOR, dl, ccResultVT,
- DAG.getSetCC(dl, ccResultVT,
- lhs, DAG.getConstantFP(0.0, lhsVT),
- ISD::SETUO),
- DAG.getConstant(ccResultAllOnes, ccResultVT));
- } else if (CC->get() == ISD::SETUO) {
- // Evaluates to true if Op0 is [SQ]NaN
- return DAG.getNode(ISD::AND, dl, ccResultVT,
- DAG.getSetCC(dl, ccResultVT,
- lhsHi32abs,
- DAG.getConstant(0x7ff00000, MVT::i32),
- ISD::SETGE),
- DAG.getSetCC(dl, ccResultVT,
- lhsLo32,
- DAG.getConstant(0, MVT::i32),
- ISD::SETGT));
- }
-
- SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs);
- SDValue rhsHi32 =
- DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
- DAG.getNode(ISD::SRL, dl, IntVT,
- i64rhs, DAG.getConstant(32, MVT::i32)));
-
- // If a value is negative, subtract from the sign magnitude constant:
- SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
-
- // Convert the sign-magnitude representation into 2's complement:
- SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
- lhsHi32, DAG.getConstant(31, MVT::i32));
- SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
- SDValue lhsSelect =
- DAG.getNode(ISD::SELECT, dl, IntVT,
- lhsSelectMask, lhsSignMag2TC, i64lhs);
-
- SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
- rhsHi32, DAG.getConstant(31, MVT::i32));
- SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
- SDValue rhsSelect =
- DAG.getNode(ISD::SELECT, dl, IntVT,
- rhsSelectMask, rhsSignMag2TC, i64rhs);
-
- unsigned compareOp;
-
- switch (CC->get()) {
- case ISD::SETOEQ:
- case ISD::SETUEQ:
- compareOp = ISD::SETEQ; break;
- case ISD::SETOGT:
- case ISD::SETUGT:
- compareOp = ISD::SETGT; break;
- case ISD::SETOGE:
- case ISD::SETUGE:
- compareOp = ISD::SETGE; break;
- case ISD::SETOLT:
- case ISD::SETULT:
- compareOp = ISD::SETLT; break;
- case ISD::SETOLE:
- case ISD::SETULE:
- compareOp = ISD::SETLE; break;
- case ISD::SETUNE:
- case ISD::SETONE:
- compareOp = ISD::SETNE; break;
- default:
- report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
- }
-
- SDValue result =
- DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
- (ISD::CondCode) compareOp);
-
- if ((CC->get() & 0x8) == 0) {
- // Ordered comparison:
- SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
- lhs, DAG.getConstantFP(0.0, MVT::f64),
- ISD::SETO);
- SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
- rhs, DAG.getConstantFP(0.0, MVT::f64),
- ISD::SETO);
- SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
-
- result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
- }
-
- return result;
-}
-
-//! Lower ISD::SELECT_CC
-/*!
- ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
- SELB instruction.
-
- \note Need to revisit this in the future: if the code path through the true
- and false value computations is longer than the latency of a branch (6
- cycles), then it would be more advantageous to branch and insert a new basic
- block and branch on the condition. However, this code does not make that
- assumption, given the simplisitc uses so far.
- */
-
-static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
- const TargetLowering &TLI) {
- EVT VT = Op.getValueType();
- SDValue lhs = Op.getOperand(0);
- SDValue rhs = Op.getOperand(1);
- SDValue trueval = Op.getOperand(2);
- SDValue falseval = Op.getOperand(3);
- SDValue condition = Op.getOperand(4);
- DebugLoc dl = Op.getDebugLoc();
-
- // NOTE: SELB's arguments: $rA, $rB, $mask
- //
- // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
- // where bits in $mask are 1. CCond will be inverted, having 1s where the
- // condition was true and 0s where the condition was false. Hence, the
- // arguments to SELB get reversed.
-
- // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
- // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
- // with another "cannot select select_cc" assert:
-
- SDValue compare = DAG.getNode(ISD::SETCC, dl,
- TLI.getSetCCResultType(Op.getValueType()),
- lhs, rhs, condition);
- return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
-}
-
-//! Custom lower ISD::TRUNCATE
-static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
-{
- // Type to truncate to
- EVT VT = Op.getValueType();
- MVT simpleVT = VT.getSimpleVT();
- EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
- VT, (128 / VT.getSizeInBits()));
- DebugLoc dl = Op.getDebugLoc();
-
- // Type to truncate from
- SDValue Op0 = Op.getOperand(0);
- EVT Op0VT = Op0.getValueType();
-
- if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
- // Create shuffle mask, least significant doubleword of quadword
- unsigned maskHigh = 0x08090a0b;
- unsigned maskLow = 0x0c0d0e0f;
- // Use a shuffle to perform the truncation
- SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- DAG.getConstant(maskHigh, MVT::i32),
- DAG.getConstant(maskLow, MVT::i32),
- DAG.getConstant(maskHigh, MVT::i32),
- DAG.getConstant(maskLow, MVT::i32));
-
- SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
- Op0, Op0, shufMask);
-
- return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
- }
-
- return SDValue(); // Leave the truncate unmolested
-}
-
-/*!
- * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
- * algorithm is to duplicate the sign bit using rotmai to generate at
- * least one byte full of sign bits. Then propagate the "sign-byte" into
- * the leftmost words and the i64/i32 into the rightmost words using shufb.
- *
- * @param Op The sext operand
- * @param DAG The current DAG
- * @return The SDValue with the entire instruction sequence
- */
-static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
-{
- DebugLoc dl = Op.getDebugLoc();
-
- // Type to extend to
- MVT OpVT = Op.getValueType().getSimpleVT();
-
- // Type to extend from
- SDValue Op0 = Op.getOperand(0);
- MVT Op0VT = Op0.getValueType().getSimpleVT();
-
- // extend i8 & i16 via i32
- if (Op0VT == MVT::i8 || Op0VT == MVT::i16) {
- Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0);
- Op0VT = MVT::i32;
- }
-
- // The type to extend to needs to be a i128 and
- // the type to extend from needs to be i64 or i32.
- assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
- "LowerSIGN_EXTEND: input and/or output operand have wrong size");
- (void)OpVT;
-
- // Create shuffle mask
- unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
- unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11
- unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
- SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- DAG.getConstant(mask1, MVT::i32),
- DAG.getConstant(mask1, MVT::i32),
- DAG.getConstant(mask2, MVT::i32),
- DAG.getConstant(mask3, MVT::i32));
-
- // Word wise arithmetic right shift to generate at least one byte
- // that contains sign bits.
- MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
- SDValue sraVal = DAG.getNode(ISD::SRA,
- dl,
- mvt,
- DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
- DAG.getConstant(31, MVT::i32));
-
- // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
- SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
- dl, Op0VT, Op0,
- DAG.getTargetConstant(
- SPU::GPRCRegClass.getID(),
- MVT::i32)), 0);
- // Shuffle bytes - Copy the sign bits into the upper 64 bits
- // and the input value into the lower 64 bits.
- SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
- extended, sraVal, shufMask);
- return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle);
-}
-
-//! Custom (target-specific) lowering entry point
-/*!
- This is where LLVM's DAG selection process calls to do target-specific
- lowering of nodes.
- */
-SDValue
-SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
-{
- unsigned Opc = (unsigned) Op.getOpcode();
- EVT VT = Op.getValueType();
-
- switch (Opc) {
- default: {
-#ifndef NDEBUG
- errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
- errs() << "Op.getOpcode() = " << Opc << "\n";
- errs() << "*Op.getNode():\n";
- Op.getNode()->dump();
-#endif
- llvm_unreachable(0);
- }
- case ISD::LOAD:
- case ISD::EXTLOAD:
- case ISD::SEXTLOAD:
- case ISD::ZEXTLOAD:
- return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
- case ISD::STORE:
- return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
- case ISD::ConstantPool:
- return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
- case ISD::GlobalAddress:
- return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
- case ISD::JumpTable:
- return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
- case ISD::ConstantFP:
- return LowerConstantFP(Op, DAG);
-
- // i8, i64 math ops:
- case ISD::ADD:
- case ISD::SUB:
- case ISD::ROTR:
- case ISD::ROTL:
- case ISD::SRL:
- case ISD::SHL:
- case ISD::SRA: {
- if (VT == MVT::i8)
- return LowerI8Math(Op, DAG, Opc, *this);
- break;
- }
-
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
- return LowerFP_TO_INT(Op, DAG, *this);
-
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- return LowerINT_TO_FP(Op, DAG, *this);
-
- // Vector-related lowering.
- case ISD::BUILD_VECTOR:
- return LowerBUILD_VECTOR(Op, DAG);
- case ISD::SCALAR_TO_VECTOR:
- return LowerSCALAR_TO_VECTOR(Op, DAG);
- case ISD::VECTOR_SHUFFLE:
- return LowerVECTOR_SHUFFLE(Op, DAG);
- case ISD::EXTRACT_VECTOR_ELT:
- return LowerEXTRACT_VECTOR_ELT(Op, DAG);
- case ISD::INSERT_VECTOR_ELT:
- return LowerINSERT_VECTOR_ELT(Op, DAG);
-
- // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
- case ISD::AND:
- case ISD::OR:
- case ISD::XOR:
- return LowerByteImmed(Op, DAG);
-
- // Vector and i8 multiply:
- case ISD::MUL:
- if (VT == MVT::i8)
- return LowerI8Math(Op, DAG, Opc, *this);
-
- case ISD::CTPOP:
- return LowerCTPOP(Op, DAG);
-
- case ISD::SELECT_CC:
- return LowerSELECT_CC(Op, DAG, *this);
-
- case ISD::SETCC:
- return LowerSETCC(Op, DAG, *this);
-
- case ISD::TRUNCATE:
- return LowerTRUNCATE(Op, DAG);
-
- case ISD::SIGN_EXTEND:
- return LowerSIGN_EXTEND(Op, DAG);
- }
-
- return SDValue();
-}
-
-void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG) const
-{
-#if 0
- unsigned Opc = (unsigned) N->getOpcode();
- EVT OpVT = N->getValueType(0);
-
- switch (Opc) {
- default: {
- errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
- errs() << "Op.getOpcode() = " << Opc << "\n";
- errs() << "*Op.getNode():\n";
- N->dump();
- abort();
- /*NOTREACHED*/
- }
- }
-#endif
-
- /* Otherwise, return unchanged */
-}
-
-//===----------------------------------------------------------------------===//
-// Target Optimization Hooks
-//===----------------------------------------------------------------------===//
-
-SDValue
-SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
-{
-#if 0
- TargetMachine &TM = getTargetMachine();
-#endif
- const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
- SelectionDAG &DAG = DCI.DAG;
- SDValue Op0 = N->getOperand(0); // everything has at least one operand
- EVT NodeVT = N->getValueType(0); // The node's value type
- EVT Op0VT = Op0.getValueType(); // The first operand's result
- SDValue Result; // Initially, empty result
- DebugLoc dl = N->getDebugLoc();
-
- switch (N->getOpcode()) {
- default: break;
- case ISD::ADD: {
- SDValue Op1 = N->getOperand(1);
-
- if (Op0.getOpcode() == SPUISD::IndirectAddr
- || Op1.getOpcode() == SPUISD::IndirectAddr) {
- // Normalize the operands to reduce repeated code
- SDValue IndirectArg = Op0, AddArg = Op1;
-
- if (Op1.getOpcode() == SPUISD::IndirectAddr) {
- IndirectArg = Op1;
- AddArg = Op0;
- }
-
- if (isa<ConstantSDNode>(AddArg)) {
- ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
- SDValue IndOp1 = IndirectArg.getOperand(1);
-
- if (CN0->isNullValue()) {
- // (add (SPUindirect <arg>, <arg>), 0) ->
- // (SPUindirect <arg>, <arg>)
-
-#if !defined(NDEBUG)
- if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- errs() << "\n"
- << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
- << "With: (SPUindirect <arg>, <arg>)\n";
- }
-#endif
-
- return IndirectArg;
- } else if (isa<ConstantSDNode>(IndOp1)) {
- // (add (SPUindirect <arg>, <const>), <const>) ->
- // (SPUindirect <arg>, <const + const>)
- ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
- int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
- SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
-
-#if !defined(NDEBUG)
- if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- errs() << "\n"
- << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
- << "), " << CN0->getSExtValue() << ")\n"
- << "With: (SPUindirect <arg>, "
- << combinedConst << ")\n";
- }
-#endif
-
- return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
- IndirectArg, combinedValue);
- }
- }
- }
- break;
- }
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND: {
- if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
- // (any_extend (SPUextract_elt0 <arg>)) ->
- // (SPUextract_elt0 <arg>)
- // Types must match, however...
-#if !defined(NDEBUG)
- if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- errs() << "\nReplace: ";
- N->dump(&DAG);
- errs() << "\nWith: ";
- Op0.getNode()->dump(&DAG);
- errs() << "\n";
- }
-#endif
-
- return Op0;
- }
- break;
- }
- case SPUISD::IndirectAddr: {
- if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (CN != 0 && CN->isNullValue()) {
- // (SPUindirect (SPUaform <addr>, 0), 0) ->
- // (SPUaform <addr>, 0)
-
- DEBUG(errs() << "Replace: ");
- DEBUG(N->dump(&DAG));
- DEBUG(errs() << "\nWith: ");
- DEBUG(Op0.getNode()->dump(&DAG));
- DEBUG(errs() << "\n");
-
- return Op0;
- }
- } else if (Op0.getOpcode() == ISD::ADD) {
- SDValue Op1 = N->getOperand(1);
- if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
- // (SPUindirect (add <arg>, <arg>), 0) ->
- // (SPUindirect <arg>, <arg>)
- if (CN1->isNullValue()) {
-
-#if !defined(NDEBUG)
- if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- errs() << "\n"
- << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
- << "With: (SPUindirect <arg>, <arg>)\n";
- }
-#endif
-
- return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
- Op0.getOperand(0), Op0.getOperand(1));
- }
- }
- }
- break;
- }
- case SPUISD::SHL_BITS:
- case SPUISD::SHL_BYTES:
- case SPUISD::ROTBYTES_LEFT: {
- SDValue Op1 = N->getOperand(1);
-
- // Kill degenerate vector shifts:
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
- if (CN->isNullValue()) {
- Result = Op0;
- }
- }
- break;
- }
- case SPUISD::PREFSLOT2VEC: {
- switch (Op0.getOpcode()) {
- default:
- break;
- case ISD::ANY_EXTEND:
- case ISD::ZERO_EXTEND:
- case ISD::SIGN_EXTEND: {
- // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
- // <arg>
- // but only if the SPUprefslot2vec and <arg> types match.
- SDValue Op00 = Op0.getOperand(0);
- if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
- SDValue Op000 = Op00.getOperand(0);
- if (Op000.getValueType() == NodeVT) {
- Result = Op000;
- }
- }
- break;
- }
- case SPUISD::VEC2PREFSLOT: {
- // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
- // <arg>
- Result = Op0.getOperand(0);
- break;
- }
- }
- break;
- }
- }
-
- // Otherwise, return unchanged.
-#ifndef NDEBUG
- if (Result.getNode()) {
- DEBUG(errs() << "\nReplace.SPU: ");
- DEBUG(N->dump(&DAG));
- DEBUG(errs() << "\nWith: ");
- DEBUG(Result.getNode()->dump(&DAG));
- DEBUG(errs() << "\n");
- }
-#endif
-
- return Result;
-}
-
-//===----------------------------------------------------------------------===//
-// Inline Assembly Support
-//===----------------------------------------------------------------------===//
-
-/// getConstraintType - Given a constraint letter, return the type of
-/// constraint it is for this target.
-SPUTargetLowering::ConstraintType
-SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
- if (ConstraintLetter.size() == 1) {
- switch (ConstraintLetter[0]) {
- default: break;
- case 'b':
- case 'r':
- case 'f':
- case 'v':
- case 'y':
- return C_RegisterClass;
- }
- }
- return TargetLowering::getConstraintType(ConstraintLetter);
-}
-
-/// Examine constraint type and operand type and determine a weight value.
-/// This object must already have been set up with the operand type
-/// and the current alternative constraint selected.
-TargetLowering::ConstraintWeight
-SPUTargetLowering::getSingleConstraintMatchWeight(
- AsmOperandInfo &info, const char *constraint) const {
- ConstraintWeight weight = CW_Invalid;
- Value *CallOperandVal = info.CallOperandVal;
- // If we don't have a value, we can't do a match,
- // but allow it at the lowest weight.
- if (CallOperandVal == NULL)
- return CW_Default;
- // Look at the constraint type.
- switch (*constraint) {
- default:
- weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
- break;
- //FIXME: Seems like the supported constraint letters were just copied
- // from PPC, as the following doesn't correspond to the GCC docs.
- // I'm leaving it so until someone adds the corresponding lowering support.
- case 'b':
- case 'r':
- case 'f':
- case 'd':
- case 'v':
- case 'y':
- weight = CW_Register;
- break;
- }
- return weight;
-}
-
-std::pair<unsigned, const TargetRegisterClass*>
-SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const
-{
- if (Constraint.size() == 1) {
- // GCC RS6000 Constraint Letters
- switch (Constraint[0]) {
- case 'b': // R1-R31
- case 'r': // R0-R31
- if (VT == MVT::i64)
- return std::make_pair(0U, &SPU::R64CRegClass);
- return std::make_pair(0U, &SPU::R32CRegClass);
- case 'f':
- if (VT == MVT::f32)
- return std::make_pair(0U, &SPU::R32FPRegClass);
- if (VT == MVT::f64)
- return std::make_pair(0U, &SPU::R64FPRegClass);
- break;
- case 'v':
- return std::make_pair(0U, &SPU::GPRCRegClass);
- }
- }
-
- return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
-}
-
-//! Compute used/known bits for a SPU operand
-void
-SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth ) const {
-#if 0
- const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
-
- switch (Op.getOpcode()) {
- default:
- // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
- break;
- case CALL:
- case SHUFB:
- case SHUFFLE_MASK:
- case CNTB:
- case SPUISD::PREFSLOT2VEC:
- case SPUISD::LDRESULT:
- case SPUISD::VEC2PREFSLOT:
- case SPUISD::SHLQUAD_L_BITS:
- case SPUISD::SHLQUAD_L_BYTES:
- case SPUISD::VEC_ROTL:
- case SPUISD::VEC_ROTR:
- case SPUISD::ROTBYTES_LEFT:
- case SPUISD::SELECT_MASK:
- case SPUISD::SELB:
- }
-#endif
-}
-
-unsigned
-SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
- unsigned Depth) const {
- switch (Op.getOpcode()) {
- default:
- return 1;
-
- case ISD::SETCC: {
- EVT VT = Op.getValueType();
-
- if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
- VT = MVT::i32;
- }
- return VT.getSizeInBits();
- }
- }
-}
-
-// LowerAsmOperandForConstraint
-void
-SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
- std::vector<SDValue> &Ops,
- SelectionDAG &DAG) const {
- // Default, for the time being, to the base class handler
- TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
-}
-
-/// isLegalAddressImmediate - Return true if the integer value can be used
-/// as the offset of the target addressing mode.
-bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
- Type *Ty) const {
- // SPU's addresses are 256K:
- return (V > -(1 << 18) && V < (1 << 18) - 1);
-}
-
-bool SPUTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const {
- return false;
-}
-
-bool
-SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
- // The SPU target isn't yet aware of offsets.
- return false;
-}
-
-// can we compare to Imm without writing it into a register?
-bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
- //ceqi, cgti, etc. all take s10 operand
- return isInt<10>(Imm);
-}
-
-bool
-SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type * ) const{
-
- // A-form: 18bit absolute address.
- if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
- return true;
-
- // D-form: reg + 14bit offset
- if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
- return true;
-
- // X-form: reg+reg
- if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)
- return true;
-
- return false;
-}
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
deleted file mode 100644
index 9f1599fa6f..0000000000
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ /dev/null
@@ -1,178 +0,0 @@
-//===-- SPUISelLowering.h - Cell SPU DAG Lowering Interface -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that Cell SPU uses to lower LLVM code into
-// a selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_ISELLOWERING_H
-#define SPU_ISELLOWERING_H
-
-#include "SPU.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-
-namespace llvm {
- namespace SPUISD {
- enum NodeType {
- // Start the numbering where the builting ops and target ops leave off.
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- // Pseudo instructions:
- RET_FLAG, ///< Return with flag, matched by bi instruction
-
- Hi, ///< High address component (upper 16)
- Lo, ///< Low address component (lower 16)
- PCRelAddr, ///< Program counter relative address
- AFormAddr, ///< A-form address (local store)
- IndirectAddr, ///< D-Form "imm($r)" and X-form "$r($r)"
-
- LDRESULT, ///< Load result (value, chain)
- CALL, ///< CALL instruction
- SHUFB, ///< Vector shuffle (permute)
- SHUFFLE_MASK, ///< Shuffle mask
- CNTB, ///< Count leading ones in bytes
- PREFSLOT2VEC, ///< Promote scalar->vector
- VEC2PREFSLOT, ///< Extract element 0
- SHL_BITS, ///< Shift quad left, by bits
- SHL_BYTES, ///< Shift quad left, by bytes
- SRL_BYTES, ///< Shift quad right, by bytes. Insert zeros.
- VEC_ROTL, ///< Vector rotate left
- VEC_ROTR, ///< Vector rotate right
- ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
- ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
- SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
- SELB, ///< Select bits -> (b & mask) | (a & ~mask)
- // Markers: These aren't used to generate target-dependent nodes, but
- // are used during instruction selection.
- ADD64_MARKER, ///< i64 addition marker
- SUB64_MARKER, ///< i64 subtraction marker
- MUL64_MARKER, ///< i64 multiply marker
- LAST_SPUISD ///< Last user-defined instruction
- };
- }
-
- //! Utility functions specific to CellSPU:
- namespace SPU {
- SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType);
- SDValue get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType);
- SDValue get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType);
- SDValue get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType);
- SDValue get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
- EVT ValueType);
- SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG);
- SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG);
-
- SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
- const SPUTargetMachine &TM);
- //! Simplify a EVT::v2i64 constant splat to CellSPU-ready form
- SDValue LowerV2I64Splat(EVT OpVT, SelectionDAG &DAG, uint64_t splat,
- DebugLoc dl);
- }
-
- class SPUTargetMachine; // forward dec'l.
-
- class SPUTargetLowering :
- public TargetLowering
- {
- SPUTargetMachine &SPUTM;
-
- public:
- //! The venerable constructor
- /*!
- This is where the CellSPU backend sets operation handling (i.e., legal,
- custom, expand or promote.)
- */
- SPUTargetLowering(SPUTargetMachine &TM);
-
- //! Get the target machine
- SPUTargetMachine &getSPUTargetMachine() {
- return SPUTM;
- }
-
- /// getTargetNodeName() - This method returns the name of a target specific
- /// DAG node.
- virtual const char *getTargetNodeName(unsigned Opcode) const;
-
- /// getSetCCResultType - Return the ValueType for ISD::SETCC
- virtual EVT getSetCCResultType(EVT VT) const;
-
- virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
-
- //! Custom lowering hooks
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
-
- //! Custom lowering hook for nodes with illegal result types.
- virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG) const;
-
- virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-
- virtual void computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth = 0) const;
-
- virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
- unsigned Depth = 0) const;
-
- ConstraintType getConstraintType(const std::string &ConstraintLetter) const;
-
- /// Examine constraint string and operand type and determine a weight value.
- /// The operand object must already have been set up with the operand type.
- ConstraintWeight getSingleConstraintMatchWeight(
- AsmOperandInfo &info, const char *constraint) const;
-
- std::pair<unsigned, const TargetRegisterClass*>
- getRegForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const;
-
- void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
- std::vector<SDValue> &Ops,
- SelectionDAG &DAG) const;
-
- /// isLegalAddressImmediate - Return true if the integer value can be used
- /// as the offset of the target addressing mode.
- virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const;
- virtual bool isLegalAddressImmediate(GlobalValue *) const;
-
- virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
-
- virtual SDValue
- LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
-
- virtual SDValue
- LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const;
-
- virtual SDValue
- LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const;
-
- virtual bool isLegalICmpImmediate(int64_t Imm) const;
-
- virtual bool isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const;
- };
-}
-
-#endif
diff --git a/lib/Target/CellSPU/SPUInstrBuilder.h b/lib/Target/CellSPU/SPUInstrBuilder.h
deleted file mode 100644
index b495537fc2..0000000000
--- a/lib/Target/CellSPU/SPUInstrBuilder.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- SPUInstrBuilder.h - Aides for building Cell SPU insts ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file exposes functions that may be used with BuildMI from the
-// MachineInstrBuilder.h file to simplify generating frame and constant pool
-// references.
-//
-// For reference, the order of operands for memory references is:
-// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
-// Displacement.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_INSTRBUILDER_H
-#define SPU_INSTRBUILDER_H
-
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-
-namespace llvm {
-
-/// addFrameReference - This function is used to add a reference to the base of
-/// an abstract object on the stack frame of the current function. This
-/// reference has base register as the FrameIndex offset until it is resolved.
-/// This allows a constant offset to be specified as well...
-///
-inline const MachineInstrBuilder&
-addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
- bool mem = true) {
- if (mem)
- return MIB.addImm(Offset).addFrameIndex(FI);
- else
- return MIB.addFrameIndex(FI).addImm(Offset);
-}
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/CellSPU/SPUInstrFormats.td b/lib/Target/CellSPU/SPUInstrFormats.td
deleted file mode 100644
index cd3f422143..0000000000
--- a/lib/Target/CellSPU/SPUInstrFormats.td
+++ /dev/null
@@ -1,320 +0,0 @@
-//===-- SPUInstrFormats.td - Cell SPU Instruction Formats --*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//
-// Cell SPU instruction formats. Note that these are notationally similar to
-// PowerPC, like "A-Form". But the sizes of operands and fields differ.
-
-// This was kiped from the PPC instruction formats (seemed like a good idea...)
-
-class SPUInstr<dag OOL, dag IOL, string asmstr, InstrItinClass itin>
- : Instruction {
- field bits<32> Inst;
-
- let Namespace = "SPU";
- let OutOperandList = OOL;
- let InOperandList = IOL;
- let AsmString = asmstr;
- let Itinerary = itin;
-}
-
-// RR Format
-class RRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : SPUInstr<OOL, IOL, asmstr, itin> {
- bits<7> RA;
- bits<7> RB;
- bits<7> RT;
-
- let Pattern = pattern;
-
- let Inst{0-10} = opcode;
- let Inst{11-17} = RB;
- let Inst{18-24} = RA;
- let Inst{25-31} = RT;
-}
-
-let RB = 0 in {
- // RR Format, where RB is zeroed (dont care):
- class RRForm_1<bits<11> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
- { }
-
- let RA = 0 in {
- // RR Format, where RA and RB are zeroed (dont care):
- // Used for reads from status control registers (see FPSCRRr32)
- class RRForm_2<bits<11> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
- { }
- }
-}
-
-let RT = 0 in {
- // RR Format, where RT is zeroed (don't care), or as the instruction handbook
- // says, "RT is a false target." Used in "Halt if" instructions
- class RRForm_3<bits<11> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
- { }
-}
-
-// RRR Format
-class RRRForm<bits<4> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : SPUInstr<OOL, IOL, asmstr, itin>
-{
- bits<7> RA;
- bits<7> RB;
- bits<7> RC;
- bits<7> RT;
-
- let Pattern = pattern;
-
- let Inst{0-3} = opcode;
- let Inst{4-10} = RT;
- let Inst{11-17} = RB;
- let Inst{18-24} = RA;
- let Inst{25-31} = RC;
-}
-
-// RI7 Format
-class RI7Form<bits<11> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : SPUInstr<OOL, IOL, asmstr, itin>
-{
- bits<7> i7;
- bits<7> RA;
- bits<7> RT;
-
- let Pattern = pattern;
-
- let Inst{0-10} = opcode;
- let Inst{11-17} = i7;
- let Inst{18-24} = RA;
- let Inst{25-31} = RT;
-}
-
-// CVTIntFp Format
-class CVTIntFPForm<bits<10> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : SPUInstr<OOL, IOL, asmstr, itin>
-{
- bits<7> RA;
- bits<7> RT;
-
- let Pattern = pattern;
-
- let Inst{0-9} = opcode;
- let Inst{10-17} = 0;
- let Inst{18-24} = RA;
- let Inst{25-31} = RT;
-}
-
-let RA = 0 in {
- class BICondForm<bits<11> opcode, dag OOL, dag IOL, string asmstr, list<dag> pattern>
- : RRForm<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
- { }
-
- let RT = 0 in {
- // Branch instruction format (without D/E flag settings)
- class BRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
- { }
-
- class BIForm<bits<11> opcode, string asmstr, list<dag> pattern>
- : RRForm<opcode, (outs), (ins R32C:$func), asmstr, BranchResolv,
- pattern>
- { }
-
- let RB = 0 in {
- // Return instruction (bi, branch indirect), RA is zero (LR):
- class RETForm<string asmstr, list<dag> pattern>
- : BRForm<0b00010101100, (outs), (ins), asmstr, BranchResolv,
- pattern>
- { }
- }
- }
-}
-
-// Branch indirect external data forms:
-class BISLEDForm<bits<2> DE_flag, string asmstr, list<dag> pattern>
- : SPUInstr<(outs), (ins indcalltarget:$func), asmstr, BranchResolv>
-{
- bits<7> Rcalldest;
-
- let Pattern = pattern;
-
- let Inst{0-10} = 0b11010101100;
- let Inst{11} = 0;
- let Inst{12-13} = DE_flag;
- let Inst{14-17} = 0b0000;
- let Inst{18-24} = Rcalldest;
- let Inst{25-31} = 0b0000000;
-}
-
-// RI10 Format
-class RI10Form<bits<8> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : SPUInstr<OOL, IOL, asmstr, itin>
-{
- bits<10> i10;
- bits<7> RA;
- bits<7> RT;
-
- let Pattern = pattern;
-
- let Inst{0-7} = opcode;
- let Inst{8-17} = i10;
- let Inst{18-24} = RA;
- let Inst{25-31} = RT;
-}
-
-// RI10 Format, where the constant is zero (or effectively ignored by the
-// SPU)
-let i10 = 0 in {
- class RI10Form_1<bits<8> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
- { }
-}
-
-// RI10 Format, where RT is ignored.
-// This format is used primarily by the Halt If ... Immediate set of
-// instructions
-let RT = 0 in {
- class RI10Form_2<bits<8> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
- { }
-}
-
-// RI16 Format
-class RI16Form<bits<9> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : SPUInstr<OOL, IOL, asmstr, itin>
-{
- bits<16> i16;
- bits<7> RT;
-
- let Pattern = pattern;
-
- let Inst{0-8} = opcode;
- let Inst{9-24} = i16;
- let Inst{25-31} = RT;
-}
-
-// Specialized version of the RI16 Format for unconditional branch relative and
-// branch absolute, branch and set link. Note that for branch and set link, the
-// link register doesn't have to be $lr, but this is actually hard coded into
-// the instruction pattern.
-
-let RT = 0 in {
- class UncondBranch<bits<9> opcode, dag OOL, dag IOL, string asmstr,
- list<dag> pattern>
- : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
- { }
-
- class BranchSetLink<bits<9> opcode, dag OOL, dag IOL, string asmstr,
- list<dag> pattern>
- : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
- { }
-}
-
-//===----------------------------------------------------------------------===//
-// Specialized versions of RI16:
-//===----------------------------------------------------------------------===//
-
-// RI18 Format
-class RI18Form<bits<7> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : SPUInstr<OOL, IOL, asmstr, itin>
-{
- bits<18> i18;
- bits<7> RT;
-
- let Pattern = pattern;
-
- let Inst{0-6} = opcode;
- let Inst{7-24} = i18;
- let Inst{25-31} = RT;
-}
-
-//===----------------------------------------------------------------------===//
-// Instruction formats for intrinsics:
-//===----------------------------------------------------------------------===//
-
-// RI10 Format for v8i16 intrinsics
-class RI10_Int_v8i16<bits<8> opcode, string opc, InstrItinClass itin,
- Intrinsic IntID> :
- RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
- !strconcat(opc, " $rT, $rA, $val"), itin,
- [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
- i16ImmSExt10:$val))] >;
-
-class RI10_Int_v4i32<bits<8> opcode, string opc, InstrItinClass itin,
- Intrinsic IntID> :
- RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
- !strconcat(opc, " $rT, $rA, $val"), itin,
- [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
- i32ImmSExt10:$val))] >;
-
-// RR Format for v8i16 intrinsics
-class RR_Int_v8i16<bits<11> opcode, string opc, InstrItinClass itin,
- Intrinsic IntID> :
- RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- !strconcat(opc, " $rT, $rA, $rB"), itin,
- [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))] >;
-
-// RR Format for v4i32 intrinsics
-class RR_Int_v4i32<bits<11> opcode, string opc, InstrItinClass itin,
- Intrinsic IntID> :
- RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- !strconcat(opc, " $rT, $rA, $rB"), itin,
- [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
- (v4i32 VECREG:$rB)))] >;
-
-//===----------------------------------------------------------------------===//
-// Pseudo instructions, like call frames:
-//===----------------------------------------------------------------------===//
-
-class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
- : SPUInstr<OOL, IOL, asmstr, NoItinerary> {
- let OutOperandList = OOL;
- let InOperandList = IOL;
- let AsmString = asmstr;
- let Pattern = pattern;
- let Inst{31-0} = 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Branch hint formats
-//===----------------------------------------------------------------------===//
-// For hbrr and hbra
-class HBI16Form<bits<7> opcode, dag IOL, string asmstr>
- : Instruction {
- field bits<32> Inst;
- bits<16>i16;
- bits<9>RO;
-
- let Namespace = "SPU";
- let InOperandList = IOL;
- let OutOperandList = (outs); //no output
- let AsmString = asmstr;
- let Itinerary = BranchHints;
-
- let Inst{0-6} = opcode;
- let Inst{7-8} = RO{8-7};
- let Inst{9-24} = i16;
- let Inst{25-31} = RO{6-0};
-}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
deleted file mode 100644
index b25a6397ec..0000000000
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ /dev/null
@@ -1,449 +0,0 @@
-//===-- SPUInstrInfo.cpp - Cell SPU Instruction Information ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Cell SPU implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUInstrInfo.h"
-#include "SPUInstrBuilder.h"
-#include "SPUTargetMachine.h"
-#include "SPUHazardRecognizers.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define GET_INSTRINFO_CTOR
-#include "SPUGenInstrInfo.inc"
-
-using namespace llvm;
-
-namespace {
- //! Predicate for an unconditional branch instruction
- inline bool isUncondBranch(const MachineInstr *I) {
- unsigned opc = I->getOpcode();
-
- return (opc == SPU::BR
- || opc == SPU::BRA
- || opc == SPU::BI);
- }
-
- //! Predicate for a conditional branch instruction
- inline bool isCondBranch(const MachineInstr *I) {
- unsigned opc = I->getOpcode();
-
- return (opc == SPU::BRNZr32
- || opc == SPU::BRNZv4i32
- || opc == SPU::BRZr32
- || opc == SPU::BRZv4i32
- || opc == SPU::BRHNZr16
- || opc == SPU::BRHNZv8i16
- || opc == SPU::BRHZr16
- || opc == SPU::BRHZv8i16);
- }
-}
-
-SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm)
- : SPUGenInstrInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP),
- TM(tm),
- RI(*TM.getSubtargetImpl(), *this)
-{ /* NOP */ }
-
-/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
-/// this target when scheduling the DAG.
-ScheduleHazardRecognizer *SPUInstrInfo::CreateTargetHazardRecognizer(
- const TargetMachine *TM,
- const ScheduleDAG *DAG) const {
- const TargetInstrInfo *TII = TM->getInstrInfo();
- assert(TII && "No InstrInfo?");
- return new SPUHazardRecognizer(*TII);
-}
-
-unsigned
-SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
- default: break;
- case SPU::LQDv16i8:
- case SPU::LQDv8i16:
- case SPU::LQDv4i32:
- case SPU::LQDv4f32:
- case SPU::LQDv2f64:
- case SPU::LQDr128:
- case SPU::LQDr64:
- case SPU::LQDr32:
- case SPU::LQDr16: {
- const MachineOperand MOp1 = MI->getOperand(1);
- const MachineOperand MOp2 = MI->getOperand(2);
- if (MOp1.isImm() && MOp2.isFI()) {
- FrameIndex = MOp2.getIndex();
- return MI->getOperand(0).getReg();
- }
- break;
- }
- }
- return 0;
-}
-
-unsigned
-SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
- default: break;
- case SPU::STQDv16i8:
- case SPU::STQDv8i16:
- case SPU::STQDv4i32:
- case SPU::STQDv4f32:
- case SPU::STQDv2f64:
- case SPU::STQDr128:
- case SPU::STQDr64:
- case SPU::STQDr32:
- case SPU::STQDr16:
- case SPU::STQDr8: {
- const MachineOperand MOp1 = MI->getOperand(1);
- const MachineOperand MOp2 = MI->getOperand(2);
- if (MOp1.isImm() && MOp2.isFI()) {
- FrameIndex = MOp2.getIndex();
- return MI->getOperand(0).getReg();
- }
- break;
- }
- }
- return 0;
-}
-
-void SPUInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const
-{
- // We support cross register class moves for our aliases, such as R3 in any
- // reg class to any other reg class containing R3. This is required because
- // we instruction select bitconvert i64 -> f64 as a noop for example, so our
- // types have no specific meaning.
-
- BuildMI(MBB, I, DL, get(SPU::LRr128), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
-}
-
-void
-SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- unsigned opc;
- bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
- if (RC == &SPU::GPRCRegClass)
- opc = isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128;
- else if (RC == &SPU::R64CRegClass)
- opc = isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64;
- else if (RC == &SPU::R64FPRegClass)
- opc = isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64;
- else if (RC == &SPU::R32CRegClass)
- opc = isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32;
- else if (RC == &SPU::R32FPRegClass)
- opc = isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32;
- else if (RC == &SPU::R16CRegClass)
- opc = isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16;
- else if (RC == &SPU::R8CRegClass)
- opc = isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8;
- else if (RC == &SPU::VECREGRegClass)
- opc = isValidFrameIdx ? SPU::STQDv16i8 : SPU::STQXv16i8;
- else
- llvm_unreachable("Unknown regclass!");
-
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
- addFrameReference(BuildMI(MBB, MI, DL, get(opc))
- .addReg(SrcReg, getKillRegState(isKill)), FrameIdx);
-}
-
-void
-SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- unsigned opc;
- bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
- if (RC == &SPU::GPRCRegClass)
- opc = isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128;
- else if (RC == &SPU::R64CRegClass)
- opc = isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64;
- else if (RC == &SPU::R64FPRegClass)
- opc = isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64;
- else if (RC == &SPU::R32CRegClass)
- opc = isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32;
- else if (RC == &SPU::R32FPRegClass)
- opc = isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32;
- else if (RC == &SPU::R16CRegClass)
- opc = isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16;
- else if (RC == &SPU::R8CRegClass)
- opc = isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8;
- else if (RC == &SPU::VECREGRegClass)
- opc = isValidFrameIdx ? SPU::LQDv16i8 : SPU::LQXv16i8;
- else
- llvm_unreachable("Unknown regclass in loadRegFromStackSlot!");
-
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
- addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx);
-}
-
-//! Branch analysis
-/*!
- \note This code was kiped from PPC. There may be more branch analysis for
- CellSPU than what's currently done here.
- */
-bool
-SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const {
- // If the block has no terminators, it just falls into the block after it.
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin())
- return false;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return false;
- --I;
- }
- if (!isUnpredicatedTerminator(I))
- return false;
-
- // Get the last instruction in the block.
- MachineInstr *LastInst = I;
-
- // If there is only one terminator instruction, process it.
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
- if (isUncondBranch(LastInst)) {
- // Check for jump tables
- if (!LastInst->getOperand(0).isMBB())
- return true;
- TBB = LastInst->getOperand(0).getMBB();
- return false;
- } else if (isCondBranch(LastInst)) {
- // Block ends with fall-through condbranch.
- TBB = LastInst->getOperand(1).getMBB();
- DEBUG(errs() << "Pushing LastInst: ");
- DEBUG(LastInst->dump());
- Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
- Cond.push_back(LastInst->getOperand(0));
- return false;
- }
- // Otherwise, don't know what this is.
- return true;
- }
-
- // Get the instruction before it if it's a terminator.
- MachineInstr *SecondLastInst = I;
-
- // If there are three terminators, we don't know what sort of block this is.
- if (SecondLastInst && I != MBB.begin() &&
- isUnpredicatedTerminator(--I))
- return true;
-
- // If the block ends with a conditional and unconditional branch, handle it.
- if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
- TBB = SecondLastInst->getOperand(1).getMBB();
- DEBUG(errs() << "Pushing SecondLastInst: ");
- DEBUG(SecondLastInst->dump());
- Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
- Cond.push_back(SecondLastInst->getOperand(0));
- FBB = LastInst->getOperand(0).getMBB();
- return false;
- }
-
- // If the block ends with two unconditional branches, handle it. The second
- // one is not executed, so remove it.
- if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
- TBB = SecondLastInst->getOperand(0).getMBB();
- I = LastInst;
- if (AllowModify)
- I->eraseFromParent();
- return false;
- }
-
- // Otherwise, can't handle this.
- return true;
-}
-
-// search MBB for branch hint labels and branch hit ops
-static void removeHBR( MachineBasicBlock &MBB) {
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I){
- if (I->getOpcode() == SPU::HBRA ||
- I->getOpcode() == SPU::HBR_LABEL){
- I=MBB.erase(I);
- if (I == MBB.end())
- break;
- }
- }
-}
-
-unsigned
-SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator I = MBB.end();
- removeHBR(MBB);
- if (I == MBB.begin())
- return 0;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return 0;
- --I;
- }
- if (!isCondBranch(I) && !isUncondBranch(I))
- return 0;
-
- // Remove the first branch.
- DEBUG(errs() << "Removing branch: ");
- DEBUG(I->dump());
- I->eraseFromParent();
- I = MBB.end();
- if (I == MBB.begin())
- return 1;
-
- --I;
- if (!(isCondBranch(I) || isUncondBranch(I)))
- return 1;
-
- // Remove the second branch.
- DEBUG(errs() << "Removing second branch: ");
- DEBUG(I->dump());
- I->eraseFromParent();
- return 2;
-}
-
-/** Find the optimal position for a hint branch instruction in a basic block.
- * This should take into account:
- * -the branch hint delays
- * -congestion of the memory bus
- * -dual-issue scheduling (i.e. avoid insertion of nops)
- * Current implementation is rather simplistic.
- */
-static MachineBasicBlock::iterator findHBRPosition(MachineBasicBlock &MBB)
-{
- MachineBasicBlock::iterator J = MBB.end();
- for( int i=0; i<8; i++) {
- if( J == MBB.begin() ) return J;
- J--;
- }
- return J;
-}
-
-unsigned
-SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const {
- // Shouldn't be a fall through.
- assert(TBB && "InsertBranch must not be told to insert a fallthrough");
- assert((Cond.size() == 2 || Cond.size() == 0) &&
- "SPU branch conditions have two components!");
-
- MachineInstrBuilder MIB;
- //TODO: make a more accurate algorithm.
- bool haveHBR = MBB.size()>8;
-
- removeHBR(MBB);
- MCSymbol *branchLabel = MBB.getParent()->getContext().CreateTempSymbol();
- // Add a label just before the branch
- if (haveHBR)
- MIB = BuildMI(&MBB, DL, get(SPU::HBR_LABEL)).addSym(branchLabel);
-
- // One-way branch.
- if (FBB == 0) {
- if (Cond.empty()) {
- // Unconditional branch
- MIB = BuildMI(&MBB, DL, get(SPU::BR));
- MIB.addMBB(TBB);
-
- DEBUG(errs() << "Inserted one-way uncond branch: ");
- DEBUG((*MIB).dump());
-
- // basic blocks have just one branch so it is safe to add the hint a its
- if (haveHBR) {
- MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
- MIB.addSym(branchLabel);
- MIB.addMBB(TBB);
- }
- } else {
- // Conditional branch
- MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
- MIB.addReg(Cond[1].getReg()).addMBB(TBB);
-
- if (haveHBR) {
- MIB = BuildMI(MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
- MIB.addSym(branchLabel);
- MIB.addMBB(TBB);
- }
-
- DEBUG(errs() << "Inserted one-way cond branch: ");
- DEBUG((*MIB).dump());
- }
- return 1;
- } else {
- MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
- MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR));
-
- // Two-way Conditional Branch.
- MIB.addReg(Cond[1].getReg()).addMBB(TBB);
- MIB2.addMBB(FBB);
-
- if (haveHBR) {
- MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
- MIB.addSym(branchLabel);
- MIB.addMBB(FBB);
- }
-
- DEBUG(errs() << "Inserted conditional branch: ");
- DEBUG((*MIB).dump());
- DEBUG(errs() << "part 2: ");
- DEBUG((*MIB2).dump());
- return 2;
- }
-}
-
-//! Reverses a branch's condition, returning false on success.
-bool
-SPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
- const {
- // Pretty brainless way of inverting the condition, but it works, considering
- // there are only two conditions...
- static struct {
- unsigned Opc; //! The incoming opcode
- unsigned RevCondOpc; //! The reversed condition opcode
- } revconds[] = {
- { SPU::BRNZr32, SPU::BRZr32 },
- { SPU::BRNZv4i32, SPU::BRZv4i32 },
- { SPU::BRZr32, SPU::BRNZr32 },
- { SPU::BRZv4i32, SPU::BRNZv4i32 },
- { SPU::BRHNZr16, SPU::BRHZr16 },
- { SPU::BRHNZv8i16, SPU::BRHZv8i16 },
- { SPU::BRHZr16, SPU::BRHNZr16 },
- { SPU::BRHZv8i16, SPU::BRHNZv8i16 }
- };
-
- unsigned Opc = unsigned(Cond[0].getImm());
- // Pretty dull mapping between the two conditions that SPU can generate:
- for (int i = sizeof(revconds)/sizeof(revconds[0]) - 1; i >= 0; --i) {
- if (revconds[i].Opc == Opc) {
- Cond[0].setImm(revconds[i].RevCondOpc);
- return false;
- }
- }
-
- return true;
-}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
deleted file mode 100644
index 85e5821aef..0000000000
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ /dev/null
@@ -1,84 +0,0 @@
-//===-- SPUInstrInfo.h - Cell SPU Instruction Information -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the CellSPU implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_INSTRUCTIONINFO_H
-#define SPU_INSTRUCTIONINFO_H
-
-#include "SPU.h"
-#include "SPURegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-#define GET_INSTRINFO_HEADER
-#include "SPUGenInstrInfo.inc"
-
-namespace llvm {
- //! Cell SPU instruction information class
- class SPUInstrInfo : public SPUGenInstrInfo {
- SPUTargetMachine &TM;
- const SPURegisterInfo RI;
- public:
- explicit SPUInstrInfo(SPUTargetMachine &tm);
-
- /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
- /// such, whenever a client has an instance of instruction info, it should
- /// always be able to get register info as well (through this method).
- ///
- virtual const SPURegisterInfo &getRegisterInfo() const { return RI; }
-
- ScheduleHazardRecognizer *
- CreateTargetHazardRecognizer(const TargetMachine *TM,
- const ScheduleDAG *DAG) const;
-
- unsigned isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
- unsigned isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
-
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
-
- //! Store a register to a stack slot, based on its register class.
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- //! Load a register from a stack slot, based on its register class.
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- //! Reverses a branch's condition, returning false on success.
- virtual
- bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-
- virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const;
-
- virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-
- virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const;
- };
-}
-
-#endif
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
deleted file mode 100644
index 117acd736a..0000000000
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ /dev/null
@@ -1,4484 +0,0 @@
-//==- SPUInstrInfo.td - Describe the Cell SPU Instructions -*- tablegen -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Cell SPU Instructions:
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// TODO Items (not urgent today, but would be nice, low priority)
-//
-// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by
-// concatenating the byte argument b as "bbbb". Could recognize this bit pattern
-// in 16-bit and 32-bit constants and reduce instruction count.
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Pseudo instructions:
-//===----------------------------------------------------------------------===//
-
-let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in {
- def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm_i32:$amt),
- "${:comment} ADJCALLSTACKDOWN",
- [(callseq_start timm:$amt)]>;
- def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm_i32:$amt),
- "${:comment} ADJCALLSTACKUP",
- [(callseq_end timm:$amt)]>;
- def HBR_LABEL : Pseudo<(outs), (ins hbrtarget:$targ),
- "$targ:\t${:comment}branch hint target",[ ]>;
-}
-
-//===----------------------------------------------------------------------===//
-// Loads:
-// NB: The ordering is actually important, since the instruction selection
-// will try each of the instructions in sequence, i.e., the D-form first with
-// the 10-bit displacement, then the A-form with the 16 bit displacement, and
-// finally the X-form with the register-register.
-//===----------------------------------------------------------------------===//
-
-let canFoldAsLoad = 1 in {
- class LoadDFormVec<ValueType vectype>
- : RI10Form<0b00101100, (outs VECREG:$rT), (ins dformaddr:$src),
- "lqd\t$rT, $src",
- LoadStore,
- [(set (vectype VECREG:$rT), (load dform_addr:$src))]>
- { }
-
- class LoadDForm<RegisterClass rclass>
- : RI10Form<0b00101100, (outs rclass:$rT), (ins dformaddr:$src),
- "lqd\t$rT, $src",
- LoadStore,
- [(set rclass:$rT, (load dform_addr:$src))]>
- { }
-
- multiclass LoadDForms
- {
- def v16i8: LoadDFormVec<v16i8>;
- def v8i16: LoadDFormVec<v8i16>;
- def v4i32: LoadDFormVec<v4i32>;
- def v2i64: LoadDFormVec<v2i64>;
- def v4f32: LoadDFormVec<v4f32>;
- def v2f64: LoadDFormVec<v2f64>;
-
- def r128: LoadDForm<GPRC>;
- def r64: LoadDForm<R64C>;
- def r32: LoadDForm<R32C>;
- def f32: LoadDForm<R32FP>;
- def f64: LoadDForm<R64FP>;
- def r16: LoadDForm<R16C>;
- def r8: LoadDForm<R8C>;
- }
-
- class LoadAFormVec<ValueType vectype>
- : RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src),
- "lqa\t$rT, $src",
- LoadStore,
- [(set (vectype VECREG:$rT), (load aform_addr:$src))]>
- { }
-
- class LoadAForm<RegisterClass rclass>
- : RI16Form<0b100001100, (outs rclass:$rT), (ins addr256k:$src),
- "lqa\t$rT, $src",
- LoadStore,
- [(set rclass:$rT, (load aform_addr:$src))]>
- { }
-
- multiclass LoadAForms
- {
- def v16i8: LoadAFormVec<v16i8>;
- def v8i16: LoadAFormVec<v8i16>;
- def v4i32: LoadAFormVec<v4i32>;
- def v2i64: LoadAFormVec<v2i64>;
- def v4f32: LoadAFormVec<v4f32>;
- def v2f64: LoadAFormVec<v2f64>;
-
- def r128: LoadAForm<GPRC>;
- def r64: LoadAForm<R64C>;
- def r32: LoadAForm<R32C>;
- def f32: LoadAForm<R32FP>;
- def f64: LoadAForm<R64FP>;
- def r16: LoadAForm<R16C>;
- def r8: LoadAForm<R8C>;
- }
-
- class LoadXFormVec<ValueType vectype>
- : RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src),
- "lqx\t$rT, $src",
- LoadStore,
- [(set (vectype VECREG:$rT), (load xform_addr:$src))]>
- { }
-
- class LoadXForm<RegisterClass rclass>
- : RRForm<0b00100011100, (outs rclass:$rT), (ins memrr:$src),
- "lqx\t$rT, $src",
- LoadStore,
- [(set rclass:$rT, (load xform_addr:$src))]>
- { }
-
- multiclass LoadXForms
- {
- def v16i8: LoadXFormVec<v16i8>;
- def v8i16: LoadXFormVec<v8i16>;
- def v4i32: LoadXFormVec<v4i32>;
- def v2i64: LoadXFormVec<v2i64>;
- def v4f32: LoadXFormVec<v4f32>;
- def v2f64: LoadXFormVec<v2f64>;
-
- def r128: LoadXForm<GPRC>;
- def r64: LoadXForm<R64C>;
- def r32: LoadXForm<R32C>;
- def f32: LoadXForm<R32FP>;
- def f64: LoadXForm<R64FP>;
- def r16: LoadXForm<R16C>;
- def r8: LoadXForm<R8C>;
- }
-
- defm LQA : LoadAForms;
- defm LQD : LoadDForms;
- defm LQX : LoadXForms;
-
-/* Load quadword, PC relative: Not much use at this point in time.
- Might be of use later for relocatable code. It's effectively the
- same as LQA, but uses PC-relative addressing.
- def LQR : RI16Form<0b111001100, (outs VECREG:$rT), (ins s16imm:$disp),
- "lqr\t$rT, $disp", LoadStore,
- [(set VECREG:$rT, (load iaddr:$disp))]>;
- */
-}
-
-//===----------------------------------------------------------------------===//
-// Stores:
-//===----------------------------------------------------------------------===//
-class StoreDFormVec<ValueType vectype>
- : RI10Form<0b00100100, (outs), (ins VECREG:$rT, dformaddr:$src),
- "stqd\t$rT, $src",
- LoadStore,
- [(store (vectype VECREG:$rT), dform_addr:$src)]>
-{ }
-
-class StoreDForm<RegisterClass rclass>
- : RI10Form<0b00100100, (outs), (ins rclass:$rT, dformaddr:$src),
- "stqd\t$rT, $src",
- LoadStore,
- [(store rclass:$rT, dform_addr:$src)]>
-{ }
-
-multiclass StoreDForms
-{
- def v16i8: StoreDFormVec<v16i8>;
- def v8i16: StoreDFormVec<v8i16>;
- def v4i32: StoreDFormVec<v4i32>;
- def v2i64: StoreDFormVec<v2i64>;
- def v4f32: StoreDFormVec<v4f32>;
- def v2f64: StoreDFormVec<v2f64>;
-
- def r128: StoreDForm<GPRC>;
- def r64: StoreDForm<R64C>;
- def r32: StoreDForm<R32C>;
- def f32: StoreDForm<R32FP>;
- def f64: StoreDForm<R64FP>;
- def r16: StoreDForm<R16C>;
- def r8: StoreDForm<R8C>;
-}
-
-class StoreAFormVec<ValueType vectype>
- : RI16Form<0b0010010, (outs), (ins VECREG:$rT, addr256k:$src),
- "stqa\t$rT, $src",
- LoadStore,
- [(store (vectype VECREG:$rT), aform_addr:$src)]>;
-
-class StoreAForm<RegisterClass rclass>
- : RI16Form<0b001001, (outs), (ins rclass:$rT, addr256k:$src),
- "stqa\t$rT, $src",
- LoadStore,
- [(store rclass:$rT, aform_addr:$src)]>;
-
-multiclass StoreAForms
-{
- def v16i8: StoreAFormVec<v16i8>;
- def v8i16: StoreAFormVec<v8i16>;
- def v4i32: StoreAFormVec<v4i32>;
- def v2i64: StoreAFormVec<v2i64>;
- def v4f32: StoreAFormVec<v4f32>;
- def v2f64: StoreAFormVec<v2f64>;
-
- def r128: StoreAForm<GPRC>;
- def r64: StoreAForm<R64C>;
- def r32: StoreAForm<R32C>;
- def f32: StoreAForm<R32FP>;
- def f64: StoreAForm<R64FP>;
- def r16: StoreAForm<R16C>;
- def r8: StoreAForm<R8C>;
-}
-
-class StoreXFormVec<ValueType vectype>
- : RRForm<0b00100100, (outs), (ins VECREG:$rT, memrr:$src),
- "stqx\t$rT, $src",
- LoadStore,
- [(store (vectype VECREG:$rT), xform_addr:$src)]>
-{ }
-
-class StoreXForm<RegisterClass rclass>
- : RRForm<0b00100100, (outs), (ins rclass:$rT, memrr:$src),
- "stqx\t$rT, $src",
- LoadStore,
- [(store rclass:$rT, xform_addr:$src)]>
-{ }
-
-multiclass StoreXForms
-{
- def v16i8: StoreXFormVec<v16i8>;
- def v8i16: StoreXFormVec<v8i16>;
- def v4i32: StoreXFormVec<v4i32>;
- def v2i64: StoreXFormVec<v2i64>;
- def v4f32: StoreXFormVec<v4f32>;
- def v2f64: StoreXFormVec<v2f64>;
-
- def r128: StoreXForm<GPRC>;
- def r64: StoreXForm<R64C>;
- def r32: StoreXForm<R32C>;
- def f32: StoreXForm<R32FP>;
- def f64: StoreXForm<R64FP>;
- def r16: StoreXForm<R16C>;
- def r8: StoreXForm<R8C>;
-}
-
-defm STQD : StoreDForms;
-defm STQA : StoreAForms;
-defm STQX : StoreXForms;
-
-/* Store quadword, PC relative: Not much use at this point in time. Might
- be useful for relocatable code.
-def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp),
- "stqr\t$rT, $disp", LoadStore,
- [(store VECREG:$rT, iaddr:$disp)]>;
-*/
-
-//===----------------------------------------------------------------------===//
-// Generate Controls for Insertion:
-//===----------------------------------------------------------------------===//
-
-def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
- "cbd\t$rT, $src", ShuffleOp,
- [(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
-
-def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src),
- "cbx\t$rT, $src", ShuffleOp,
- [(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-
-def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
- "chd\t$rT, $src", ShuffleOp,
- [(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
-
-def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src),
- "chx\t$rT, $src", ShuffleOp,
- [(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-
-def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
- "cwd\t$rT, $src", ShuffleOp,
- [(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
-
-def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
- "cwx\t$rT, $src", ShuffleOp,
- [(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-
-def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
- "cwd\t$rT, $src", ShuffleOp,
- [(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
-
-def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
- "cwx\t$rT, $src", ShuffleOp,
- [(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-
-def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
- "cdd\t$rT, $src", ShuffleOp,
- [(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
-
-def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
- "cdx\t$rT, $src", ShuffleOp,
- [(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-
-def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
- "cdd\t$rT, $src", ShuffleOp,
- [(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
-
-def CDXf64: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
- "cdx\t$rT, $src", ShuffleOp,
- [(set (v2f64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-
-//===----------------------------------------------------------------------===//
-// Constant formation:
-//===----------------------------------------------------------------------===//
-
-def ILHv8i16:
- RI16Form<0b110000010, (outs VECREG:$rT), (ins s16imm:$val),
- "ilh\t$rT, $val", ImmLoad,
- [(set (v8i16 VECREG:$rT), (v8i16 v8i16SExt16Imm:$val))]>;
-
-def ILHr16:
- RI16Form<0b110000010, (outs R16C:$rT), (ins s16imm:$val),
- "ilh\t$rT, $val", ImmLoad,
- [(set R16C:$rT, immSExt16:$val)]>;
-
-// Cell SPU doesn't have a native 8-bit immediate load, but ILH works ("with
-// the right constant")
-def ILHr8:
- RI16Form<0b110000010, (outs R8C:$rT), (ins s16imm_i8:$val),
- "ilh\t$rT, $val", ImmLoad,
- [(set R8C:$rT, immSExt8:$val)]>;
-
-// IL does sign extension!
-
-class ILInst<dag OOL, dag IOL, list<dag> pattern>:
- RI16Form<0b100000010, OOL, IOL, "il\t$rT, $val",
- ImmLoad, pattern>;
-
-class ILVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
- ILInst<(outs VECREG:$rT), (ins immtype:$val),
- [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
-
-class ILRegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
- ILInst<(outs rclass:$rT), (ins immtype:$val),
- [(set rclass:$rT, xform:$val)]>;
-
-multiclass ImmediateLoad
-{
- def v2i64: ILVecInst<v2i64, s16imm_i64, v2i64SExt16Imm>;
- def v4i32: ILVecInst<v4i32, s16imm_i32, v4i32SExt16Imm>;
-
- // TODO: Need v2f64, v4f32
-
- def r64: ILRegInst<R64C, s16imm_i64, immSExt16>;
- def r32: ILRegInst<R32C, s16imm_i32, immSExt16>;
- def f32: ILRegInst<R32FP, s16imm_f32, fpimmSExt16>;
- def f64: ILRegInst<R64FP, s16imm_f64, fpimmSExt16>;
-}
-
-defm IL : ImmediateLoad;
-
-class ILHUInst<dag OOL, dag IOL, list<dag> pattern>:
- RI16Form<0b010000010, OOL, IOL, "ilhu\t$rT, $val",
- ImmLoad, pattern>;
-
-class ILHUVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
- ILHUInst<(outs VECREG:$rT), (ins immtype:$val),
- [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
-
-class ILHURegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
- ILHUInst<(outs rclass:$rT), (ins immtype:$val),
- [(set rclass:$rT, xform:$val)]>;
-
-multiclass ImmLoadHalfwordUpper
-{
- def v2i64: ILHUVecInst<v2i64, u16imm_i64, immILHUvec_i64>;
- def v4i32: ILHUVecInst<v4i32, u16imm_i32, immILHUvec>;
-
- def r64: ILHURegInst<R64C, u16imm_i64, hi16>;
- def r32: ILHURegInst<R32C, u16imm_i32, hi16>;
-
- // Loads the high portion of an address
- def hi: ILHURegInst<R32C, symbolHi, hi16>;
-
- // Used in custom lowering constant SFP loads:
- def f32: ILHURegInst<R32FP, f16imm, hi16_f32>;
-}
-
-defm ILHU : ImmLoadHalfwordUpper;
-
-// Immediate load address (can also be used to load 18-bit unsigned constants,
-// see the zext 16->32 pattern)
-
-class ILAInst<dag OOL, dag IOL, list<dag> pattern>:
- RI18Form<0b1000010, OOL, IOL, "ila\t$rT, $val",
- LoadNOP, pattern>;
-
-class ILAVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
- ILAInst<(outs VECREG:$rT), (ins immtype:$val),
- [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
-
-class ILARegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
- ILAInst<(outs rclass:$rT), (ins immtype:$val),
- [(set rclass:$rT, xform:$val)]>;
-
-multiclass ImmLoadAddress
-{
- def v2i64: ILAVecInst<v2i64, u18imm, v2i64Uns18Imm>;
- def v4i32: ILAVecInst<v4i32, u18imm, v4i32Uns18Imm>;
-
- def r64: ILARegInst<R64C, u18imm_i64, imm18>;
- def r32: ILARegInst<R32C, u18imm, imm18>;
- def f32: ILARegInst<R32FP, f18imm, fpimm18>;
- def f64: ILARegInst<R64FP, f18imm_f64, fpimm18>;
-
- def hi: ILARegInst<R32C, symbolHi, imm18>;
- def lo: ILARegInst<R32C, symbolLo, imm18>;
-
- def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val),
- [(set R32C:$rT, imm18:$val)]>;
-}
-
-defm ILA : ImmLoadAddress;
-
-// Immediate OR, Halfword Lower: The "other" part of loading large constants
-// into 32-bit registers. See the anonymous pattern Pat<(i32 imm:$imm), ...>
-// Note that these are really two operand instructions, but they're encoded
-// as three operands with the first two arguments tied-to each other.
-
-class IOHLInst<dag OOL, dag IOL, list<dag> pattern>:
- RI16Form<0b100000110, OOL, IOL, "iohl\t$rT, $val",
- ImmLoad, pattern>,
- RegConstraint<"$rS = $rT">,
- NoEncode<"$rS">;
-
-class IOHLVecInst<ValueType vectype, Operand immtype /* , PatLeaf xform */>:
- IOHLInst<(outs VECREG:$rT), (ins VECREG:$rS, immtype:$val),
- [/* no pattern */]>;
-
-class IOHLRegInst<RegisterClass rclass, Operand immtype /* , PatLeaf xform */>:
- IOHLInst<(outs rclass:$rT), (ins rclass:$rS, immtype:$val),
- [/* no pattern */]>;
-
-multiclass ImmOrHalfwordLower
-{
- def v2i64: IOHLVecInst<v2i64, u16imm_i64>;
- def v4i32: IOHLVecInst<v4i32, u16imm_i32>;
-
- def r32: IOHLRegInst<R32C, i32imm>;
- def f32: IOHLRegInst<R32FP, f32imm>;
-
- def lo: IOHLRegInst<R32C, symbolLo>;
-}
-
-defm IOHL: ImmOrHalfwordLower;
-
-// Form select mask for bytes using immediate, used in conjunction with the
-// SELB instruction:
-
-class FSMBIVec<ValueType vectype>:
- RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val),
- "fsmbi\t$rT, $val",
- SelectOp,
- [(set (vectype VECREG:$rT), (SPUselmask (i16 immU16:$val)))]>;
-
-multiclass FormSelectMaskBytesImm
-{
- def v16i8: FSMBIVec<v16i8>;
- def v8i16: FSMBIVec<v8i16>;
- def v4i32: FSMBIVec<v4i32>;
- def v2i64: FSMBIVec<v2i64>;
-}
-
-defm FSMBI : FormSelectMaskBytesImm;
-
-// fsmb: Form select mask for bytes. N.B. Input operand, $rA, is 16-bits
-class FSMBInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b01101101100, OOL, IOL, "fsmb\t$rT, $rA", SelectOp,
- pattern>;
-
-class FSMBRegInst<RegisterClass rclass, ValueType vectype>:
- FSMBInst<(outs VECREG:$rT), (ins rclass:$rA),
- [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
-
-class FSMBVecInst<ValueType vectype>:
- FSMBInst<(outs VECREG:$rT), (ins VECREG:$rA),
- [(set (vectype VECREG:$rT),
- (SPUselmask (vectype VECREG:$rA)))]>;
-
-multiclass FormSelectMaskBits {
- def v16i8_r16: FSMBRegInst<R16C, v16i8>;
- def v16i8: FSMBVecInst<v16i8>;
-}
-
-defm FSMB: FormSelectMaskBits;
-
-// fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is
-// only 8-bits wide (even though it's input as 16-bits here)
-
-class FSMHInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b10101101100, OOL, IOL, "fsmh\t$rT, $rA", SelectOp,
- pattern>;
-
-class FSMHRegInst<RegisterClass rclass, ValueType vectype>:
- FSMHInst<(outs VECREG:$rT), (ins rclass:$rA),
- [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
-
-class FSMHVecInst<ValueType vectype>:
- FSMHInst<(outs VECREG:$rT), (ins VECREG:$rA),
- [(set (vectype VECREG:$rT),
- (SPUselmask (vectype VECREG:$rA)))]>;
-
-multiclass FormSelectMaskHalfword {
- def v8i16_r16: FSMHRegInst<R16C, v8i16>;
- def v8i16: FSMHVecInst<v8i16>;
-}
-
-defm FSMH: FormSelectMaskHalfword;
-
-// fsm: Form select mask for words. Like the other fsm* instructions,
-// only the lower 4 bits of $rA are significant.
-
-class FSMInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b00101101100, OOL, IOL, "fsm\t$rT, $rA", SelectOp,
- pattern>;
-
-class FSMRegInst<ValueType vectype, RegisterClass rclass>:
- FSMInst<(outs VECREG:$rT), (ins rclass:$rA),
- [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
-
-class FSMVecInst<ValueType vectype>:
- FSMInst<(outs VECREG:$rT), (ins VECREG:$rA),
- [(set (vectype VECREG:$rT), (SPUselmask (vectype VECREG:$rA)))]>;
-
-multiclass FormSelectMaskWord {
- def v4i32: FSMVecInst<v4i32>;
-
- def r32 : FSMRegInst<v4i32, R32C>;
- def r16 : FSMRegInst<v4i32, R16C>;
-}
-
-defm FSM : FormSelectMaskWord;
-
-// Special case when used for i64 math operations
-multiclass FormSelectMaskWord64 {
- def r32 : FSMRegInst<v2i64, R32C>;
- def r16 : FSMRegInst<v2i64, R16C>;
-}
-
-defm FSM64 : FormSelectMaskWord64;
-
-//===----------------------------------------------------------------------===//
-// Integer and Logical Operations:
-//===----------------------------------------------------------------------===//
-
-def AHv8i16:
- RRForm<0b00010011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "ah\t$rT, $rA, $rB", IntegerOp,
- [(set (v8i16 VECREG:$rT), (int_spu_si_ah VECREG:$rA, VECREG:$rB))]>;
-
-def : Pat<(add (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
- (AHv8i16 VECREG:$rA, VECREG:$rB)>;
-
-def AHr16:
- RRForm<0b00010011000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
- "ah\t$rT, $rA, $rB", IntegerOp,
- [(set R16C:$rT, (add R16C:$rA, R16C:$rB))]>;
-
-def AHIvec:
- RI10Form<0b10111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "ahi\t$rT, $rA, $val", IntegerOp,
- [(set (v8i16 VECREG:$rT), (add (v8i16 VECREG:$rA),
- v8i16SExt10Imm:$val))]>;
-
-def AHIr16:
- RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
- "ahi\t$rT, $rA, $val", IntegerOp,
- [(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>;
-
-// v4i32, i32 add instruction:
-
-class AInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b00000011000, OOL, IOL,
- "a\t$rT, $rA, $rB", IntegerOp,
- pattern>;
-
-class AVecInst<ValueType vectype>:
- AInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA),
- (vectype VECREG:$rB)))]>;
-
-class ARegInst<RegisterClass rclass>:
- AInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (add rclass:$rA, rclass:$rB))]>;
-
-multiclass AddInstruction {
- def v4i32: AVecInst<v4i32>;
- def v16i8: AVecInst<v16i8>;
- def r32: ARegInst<R32C>;
-}
-
-defm A : AddInstruction;
-
-class AIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI10Form<0b00111000, OOL, IOL,
- "ai\t$rT, $rA, $val", IntegerOp,
- pattern>;
-
-class AIVecInst<ValueType vectype, PatLeaf immpred>:
- AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>;
-
-class AIFPVecInst<ValueType vectype, PatLeaf immpred>:
- AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [/* no pattern */]>;
-
-class AIRegInst<RegisterClass rclass, PatLeaf immpred>:
- AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
- [(set rclass:$rT, (add rclass:$rA, immpred:$val))]>;
-
-// This is used to add epsilons to floating point numbers in the f32 fdiv code:
-class AIFPInst<RegisterClass rclass, PatLeaf immpred>:
- AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
- [/* no pattern */]>;
-
-multiclass AddImmediate {
- def v4i32: AIVecInst<v4i32, v4i32SExt10Imm>;
-
- def r32: AIRegInst<R32C, i32ImmSExt10>;
-
- def v4f32: AIFPVecInst<v4f32, v4i32SExt10Imm>;
- def f32: AIFPInst<R32FP, i32ImmSExt10>;
-}
-
-defm AI : AddImmediate;
-
-def SFHvec:
- RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "sfh\t$rT, $rA, $rB", IntegerOp,
- [(set (v8i16 VECREG:$rT), (sub (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
-def SFHr16:
- RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
- "sfh\t$rT, $rA, $rB", IntegerOp,
- [(set R16C:$rT, (sub R16C:$rB, R16C:$rA))]>;
-
-def SFHIvec:
- RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "sfhi\t$rT, $rA, $val", IntegerOp,
- [(set (v8i16 VECREG:$rT), (sub v8i16SExt10Imm:$val,
- (v8i16 VECREG:$rA)))]>;
-
-def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
- "sfhi\t$rT, $rA, $val", IntegerOp,
- [(set R16C:$rT, (sub i16ImmSExt10:$val, R16C:$rA))]>;
-
-def SFvec : RRForm<0b00000010000, (outs VECREG:$rT),
- (ins VECREG:$rA, VECREG:$rB),
- "sf\t$rT, $rA, $rB", IntegerOp,
- [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>;
-
-
-def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- "sf\t$rT, $rA, $rB", IntegerOp,
- [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>;
-
-def SFIvec:
- RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- "sfi\t$rT, $rA, $val", IntegerOp,
- [(set (v4i32 VECREG:$rT), (sub v4i32SExt10Imm:$val,
- (v4i32 VECREG:$rA)))]>;
-
-def SFIr32 : RI10Form<0b00110000, (outs R32C:$rT),
- (ins R32C:$rA, s10imm_i32:$val),
- "sfi\t$rT, $rA, $val", IntegerOp,
- [(set R32C:$rT, (sub i32ImmSExt10:$val, R32C:$rA))]>;
-
-// ADDX: only available in vector form, doesn't match a pattern.
-class ADDXInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b00000010110, OOL, IOL,
- "addx\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class ADDXVecInst<ValueType vectype>:
- ADDXInst<(outs VECREG:$rT),
- (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
- [/* no pattern */]>,
- RegConstraint<"$rCarry = $rT">,
- NoEncode<"$rCarry">;
-
-class ADDXRegInst<RegisterClass rclass>:
- ADDXInst<(outs rclass:$rT),
- (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
- [/* no pattern */]>,
- RegConstraint<"$rCarry = $rT">,
- NoEncode<"$rCarry">;
-
-multiclass AddExtended {
- def v2i64 : ADDXVecInst<v2i64>;
- def v4i32 : ADDXVecInst<v4i32>;
- def r64 : ADDXRegInst<R64C>;
- def r32 : ADDXRegInst<R32C>;
-}
-
-defm ADDX : AddExtended;
-
-// CG: Generate carry for add
-class CGInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b01000011000, OOL, IOL,
- "cg\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class CGVecInst<ValueType vectype>:
- CGInst<(outs VECREG:$rT),
- (ins VECREG:$rA, VECREG:$rB),
- [/* no pattern */]>;
-
-class CGRegInst<RegisterClass rclass>:
- CGInst<(outs rclass:$rT),
- (ins rclass:$rA, rclass:$rB),
- [/* no pattern */]>;
-
-multiclass CarryGenerate {
- def v2i64 : CGVecInst<v2i64>;
- def v4i32 : CGVecInst<v4i32>;
- def r64 : CGRegInst<R64C>;
- def r32 : CGRegInst<R32C>;
-}
-
-defm CG : CarryGenerate;
-
-// SFX: Subract from, extended. This is used in conjunction with BG to subtract
-// with carry (borrow, in this case)
-class SFXInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10000010110, OOL, IOL,
- "sfx\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class SFXVecInst<ValueType vectype>:
- SFXInst<(outs VECREG:$rT),
- (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
- [/* no pattern */]>,
- RegConstraint<"$rCarry = $rT">,
- NoEncode<"$rCarry">;
-
-class SFXRegInst<RegisterClass rclass>:
- SFXInst<(outs rclass:$rT),
- (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
- [/* no pattern */]>,
- RegConstraint<"$rCarry = $rT">,
- NoEncode<"$rCarry">;
-
-multiclass SubtractExtended {
- def v2i64 : SFXVecInst<v2i64>;
- def v4i32 : SFXVecInst<v4i32>;
- def r64 : SFXRegInst<R64C>;
- def r32 : SFXRegInst<R32C>;
-}
-
-defm SFX : SubtractExtended;
-
-// BG: only available in vector form, doesn't match a pattern.
-class BGInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b01000010000, OOL, IOL,
- "bg\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class BGVecInst<ValueType vectype>:
- BGInst<(outs VECREG:$rT),
- (ins VECREG:$rA, VECREG:$rB),
- [/* no pattern */]>;
-
-class BGRegInst<RegisterClass rclass>:
- BGInst<(outs rclass:$rT),
- (ins rclass:$rA, rclass:$rB),
- [/* no pattern */]>;
-
-multiclass BorrowGenerate {
- def v4i32 : BGVecInst<v4i32>;
- def v2i64 : BGVecInst<v2i64>;
- def r64 : BGRegInst<R64C>;
- def r32 : BGRegInst<R32C>;
-}
-
-defm BG : BorrowGenerate;
-
-// BGX: Borrow generate, extended.
-def BGXvec:
- RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
- VECREG:$rCarry),
- "bgx\t$rT, $rA, $rB", IntegerOp,
- []>,
- RegConstraint<"$rCarry = $rT">,
- NoEncode<"$rCarry">;
-
-// Halfword multiply variants:
-// N.B: These can be used to build up larger quantities (16x16 -> 32)
-
-def MPYv8i16:
- RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "mpy\t$rT, $rA, $rB", IntegerMulDiv,
- [/* no pattern */]>;
-
-def MPYr16:
- RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
- "mpy\t$rT, $rA, $rB", IntegerMulDiv,
- [(set R16C:$rT, (mul R16C:$rA, R16C:$rB))]>;
-
-// Unsigned 16-bit multiply:
-
-class MPYUInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b00110011110, OOL, IOL,
- "mpyu\t$rT, $rA, $rB", IntegerMulDiv,
- pattern>;
-
-def MPYUv4i32:
- MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [/* no pattern */]>;
-
-def MPYUr16:
- MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
- [(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>;
-
-def MPYUr32:
- MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- [/* no pattern */]>;
-
-// mpyi: multiply 16 x s10imm -> 32 result.
-
-class MPYIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI10Form<0b00101110, OOL, IOL,
- "mpyi\t$rT, $rA, $val", IntegerMulDiv,
- pattern>;
-
-def MPYIvec:
- MPYIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v8i16 VECREG:$rT),
- (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
-
-def MPYIr16:
- MPYIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
- [(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>;
-
-// mpyui: same issues as other multiplies, plus, this doesn't match a
-// pattern... but may be used during target DAG selection or lowering
-
-class MPYUIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI10Form<0b10101110, OOL, IOL,
- "mpyui\t$rT, $rA, $val", IntegerMulDiv,
- pattern>;
-
-def MPYUIvec:
- MPYUIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- []>;
-
-def MPYUIr16:
- MPYUIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
- []>;
-
-// mpya: 16 x 16 + 16 -> 32 bit result
-class MPYAInst<dag OOL, dag IOL, list<dag> pattern>:
- RRRForm<0b0011, OOL, IOL,
- "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
- pattern>;
-
-def MPYAv4i32:
- MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- [(set (v4i32 VECREG:$rT),
- (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))),
- (v4i32 VECREG:$rC)))]>;
-
-def MPYAr32:
- MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
- [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)),
- R32C:$rC))]>;
-
-def MPYAr32_sext:
- MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
- [(set R32C:$rT, (add (mul (sext R16C:$rA), (sext R16C:$rB)),
- R32C:$rC))]>;
-
-def MPYAr32_sextinreg:
- MPYAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC),
- [(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16),
- (sext_inreg R32C:$rB, i16)),
- R32C:$rC))]>;
-
-// mpyh: multiply high, used to synthesize 32-bit multiplies
-class MPYHInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10100011110, OOL, IOL,
- "mpyh\t$rT, $rA, $rB", IntegerMulDiv,
- pattern>;
-
-def MPYHv4i32:
- MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [/* no pattern */]>;
-
-def MPYHr32:
- MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- [/* no pattern */]>;
-
-// mpys: multiply high and shift right (returns the top half of
-// a 16-bit multiply, sign extended to 32 bits.)
-
-class MPYSInst<dag OOL, dag IOL>:
- RRForm<0b11100011110, OOL, IOL,
- "mpys\t$rT, $rA, $rB", IntegerMulDiv,
- [/* no pattern */]>;
-
-def MPYSv4i32:
- MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
-
-def MPYSr16:
- MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>;
-
-// mpyhh: multiply high-high (returns the 32-bit result from multiplying
-// the top 16 bits of the $rA, $rB)
-
-class MPYHHInst<dag OOL, dag IOL>:
- RRForm<0b01100011110, OOL, IOL,
- "mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
- [/* no pattern */]>;
-
-def MPYHHv8i16:
- MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
-
-def MPYHHr32:
- MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
-
-// mpyhha: Multiply high-high, add to $rT:
-
-class MPYHHAInst<dag OOL, dag IOL>:
- RRForm<0b01100010110, OOL, IOL,
- "mpyhha\t$rT, $rA, $rB", IntegerMulDiv,
- [/* no pattern */]>;
-
-def MPYHHAvec:
- MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
-
-def MPYHHAr32:
- MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
-
-// mpyhhu: Multiply high-high, unsigned, e.g.:
-//
-// +-------+-------+ +-------+-------+ +---------+
-// | a0 . a1 | x | b0 . b1 | = | a0 x b0 |
-// +-------+-------+ +-------+-------+ +---------+
-//
-// where a0, b0 are the upper 16 bits of the 32-bit word
-
-class MPYHHUInst<dag OOL, dag IOL>:
- RRForm<0b01110011110, OOL, IOL,
- "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
- [/* no pattern */]>;
-
-def MPYHHUv4i32:
- MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
-
-def MPYHHUr32:
- MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
-
-// mpyhhau: Multiply high-high, unsigned
-
-class MPYHHAUInst<dag OOL, dag IOL>:
- RRForm<0b01110010110, OOL, IOL,
- "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
- [/* no pattern */]>;
-
-def MPYHHAUvec:
- MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
-
-def MPYHHAUr32:
- MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// clz: Count leading zeroes
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-class CLZInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA",
- IntegerOp, pattern>;
-
-class CLZRegInst<RegisterClass rclass>:
- CLZInst<(outs rclass:$rT), (ins rclass:$rA),
- [(set rclass:$rT, (ctlz rclass:$rA))]>;
-
-class CLZVecInst<ValueType vectype>:
- CLZInst<(outs VECREG:$rT), (ins VECREG:$rA),
- [(set (vectype VECREG:$rT), (ctlz (vectype VECREG:$rA)))]>;
-
-multiclass CountLeadingZeroes {
- def v4i32 : CLZVecInst<v4i32>;
- def r32 : CLZRegInst<R32C>;
-}
-
-defm CLZ : CountLeadingZeroes;
-
-// cntb: Count ones in bytes (aka "population count")
-//
-// NOTE: This instruction is really a vector instruction, but the custom
-// lowering code uses it in unorthodox ways to support CTPOP for other
-// data types!
-
-def CNTBv16i8:
- RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
- "cntb\t$rT, $rA", IntegerOp,
- [(set (v16i8 VECREG:$rT), (SPUcntb (v16i8 VECREG:$rA)))]>;
-
-def CNTBv8i16 :
- RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
- "cntb\t$rT, $rA", IntegerOp,
- [(set (v8i16 VECREG:$rT), (SPUcntb (v8i16 VECREG:$rA)))]>;
-
-def CNTBv4i32 :
- RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
- "cntb\t$rT, $rA", IntegerOp,
- [(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>;
-
-// gbb: Gather the low order bits from each byte in $rA into a single 16-bit
-// quantity stored into $rT's slot 0, upper 16 bits are zeroed, as are
-// slots 1-3.
-//
-// Note: This instruction "pairs" with the fsmb instruction for all of the
-// various types defined here.
-//
-// Note 2: The "VecInst" and "RegInst" forms refer to the result being either
-// a vector or register.
-
-class GBBInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b01001101100, OOL, IOL, "gbb\t$rT, $rA", GatherOp, pattern>;
-
-class GBBRegInst<RegisterClass rclass, ValueType vectype>:
- GBBInst<(outs rclass:$rT), (ins VECREG:$rA),
- [/* no pattern */]>;
-
-class GBBVecInst<ValueType vectype>:
- GBBInst<(outs VECREG:$rT), (ins VECREG:$rA),
- [/* no pattern */]>;
-
-multiclass GatherBitsFromBytes {
- def v16i8_r32: GBBRegInst<R32C, v16i8>;
- def v16i8_r16: GBBRegInst<R16C, v16i8>;
- def v16i8: GBBVecInst<v16i8>;
-}
-
-defm GBB: GatherBitsFromBytes;
-
-// gbh: Gather all low order bits from each halfword in $rA into a single
-// 8-bit quantity stored in $rT's slot 0, with the upper bits of $rT set to 0
-// and slots 1-3 also set to 0.
-//
-// See notes for GBBInst, above.
-
-class GBHInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b10001101100, OOL, IOL, "gbh\t$rT, $rA", GatherOp,
- pattern>;
-
-class GBHRegInst<RegisterClass rclass, ValueType vectype>:
- GBHInst<(outs rclass:$rT), (ins VECREG:$rA),
- [/* no pattern */]>;
-
-class GBHVecInst<ValueType vectype>:
- GBHInst<(outs VECREG:$rT), (ins VECREG:$rA),
- [/* no pattern */]>;
-
-multiclass GatherBitsHalfword {
- def v8i16_r32: GBHRegInst<R32C, v8i16>;
- def v8i16_r16: GBHRegInst<R16C, v8i16>;
- def v8i16: GBHVecInst<v8i16>;
-}
-
-defm GBH: GatherBitsHalfword;
-
-// gb: Gather all low order bits from each word in $rA into a single
-// 4-bit quantity stored in $rT's slot 0, upper bits in $rT set to 0,
-// as well as slots 1-3.
-//
-// See notes for gbb, above.
-
-class GBInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b00001101100, OOL, IOL, "gb\t$rT, $rA", GatherOp,
- pattern>;
-
-class GBRegInst<RegisterClass rclass, ValueType vectype>:
- GBInst<(outs rclass:$rT), (ins VECREG:$rA),
- [/* no pattern */]>;
-
-class GBVecInst<ValueType vectype>:
- GBInst<(outs VECREG:$rT), (ins VECREG:$rA),
- [/* no pattern */]>;
-
-multiclass GatherBitsWord {
- def v4i32_r32: GBRegInst<R32C, v4i32>;
- def v4i32_r16: GBRegInst<R16C, v4i32>;
- def v4i32: GBVecInst<v4i32>;
-}
-
-defm GB: GatherBitsWord;
-
-// avgb: average bytes
-def AVGB:
- RRForm<0b11001011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "avgb\t$rT, $rA, $rB", ByteOp,
- []>;
-
-// absdb: absolute difference of bytes
-def ABSDB:
- RRForm<0b11001010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "absdb\t$rT, $rA, $rB", ByteOp,
- []>;
-
-// sumb: sum bytes into halfwords
-def SUMB:
- RRForm<0b11001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "sumb\t$rT, $rA, $rB", ByteOp,
- []>;
-
-// Sign extension operations:
-class XSBHInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b01101101010, OOL, IOL,
- "xsbh\t$rDst, $rSrc",
- IntegerOp, pattern>;
-
-class XSBHInRegInst<RegisterClass rclass, list<dag> pattern>:
- XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc),
- pattern>;
-
-multiclass ExtendByteHalfword {
- def v16i8: XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
- [
- /*(set (v8i16 VECREG:$rDst), (sext (v8i16 VECREG:$rSrc)))*/]>;
- def r8: XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
- [(set R16C:$rDst, (sext R8C:$rSrc))]>;
- def r16: XSBHInRegInst<R16C,
- [(set R16C:$rDst, (sext_inreg R16C:$rSrc, i8))]>;
-
- // 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit
- // quantities to 32-bit quantities via a 32-bit register (see the sext 8->32
- // pattern below). Intentionally doesn't match a pattern because we want the
- // sext 8->32 pattern to do the work for us, namely because we need the extra
- // XSHWr32.
- def r32: XSBHInRegInst<R32C, [/* no pattern */]>;
-
- // Same as the 32-bit version, but for i64
- def r64: XSBHInRegInst<R64C, [/* no pattern */]>;
-}
-
-defm XSBH : ExtendByteHalfword;
-
-// Sign extend halfwords to words:
-
-class XSHWInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b01101101010, OOL, IOL, "xshw\t$rDest, $rSrc",
- IntegerOp, pattern>;
-
-class XSHWVecInst<ValueType in_vectype, ValueType out_vectype>:
- XSHWInst<(outs VECREG:$rDest), (ins VECREG:$rSrc),
- [(set (out_vectype VECREG:$rDest),
- (sext (in_vectype VECREG:$rSrc)))]>;
-
-class XSHWInRegInst<RegisterClass rclass, list<dag> pattern>:
- XSHWInst<(outs rclass:$rDest), (ins rclass:$rSrc),
- pattern>;
-
-class XSHWRegInst<RegisterClass rclass>:
- XSHWInst<(outs rclass:$rDest), (ins R16C:$rSrc),
- [(set rclass:$rDest, (sext R16C:$rSrc))]>;
-
-multiclass ExtendHalfwordWord {
- def v4i32: XSHWVecInst<v8i16, v4i32>;
-
- def r16: XSHWRegInst<R32C>;
-
- def r32: XSHWInRegInst<R32C,
- [(set R32C:$rDest, (sext_inreg R32C:$rSrc, i16))]>;
- def r64: XSHWInRegInst<R64C, [/* no pattern */]>;
-}
-
-defm XSHW : ExtendHalfwordWord;
-
-// Sign-extend words to doublewords (32->64 bits)
-
-class XSWDInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b01100101010, OOL, IOL, "xswd\t$rDst, $rSrc",
- IntegerOp, pattern>;
-
-class XSWDVecInst<ValueType in_vectype, ValueType out_vectype>:
- XSWDInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
- [/*(set (out_vectype VECREG:$rDst),
- (sext (out_vectype VECREG:$rSrc)))*/]>;
-
-class XSWDRegInst<RegisterClass in_rclass, RegisterClass out_rclass>:
- XSWDInst<(outs out_rclass:$rDst), (ins in_rclass:$rSrc),
- [(set out_rclass:$rDst, (sext in_rclass:$rSrc))]>;
-
-multiclass ExtendWordToDoubleWord {
- def v2i64: XSWDVecInst<v4i32, v2i64>;
- def r64: XSWDRegInst<R32C, R64C>;
-
- def r64_inreg: XSWDInst<(outs R64C:$rDst), (ins R64C:$rSrc),
- [(set R64C:$rDst, (sext_inreg R64C:$rSrc, i32))]>;
-}
-
-defm XSWD : ExtendWordToDoubleWord;
-
-// AND operations
-
-class ANDInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b10000011000, OOL, IOL, "and\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class ANDVecInst<ValueType vectype>:
- ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT), (and (vectype VECREG:$rA),
- (vectype VECREG:$rB)))]>;
-
-class ANDRegInst<RegisterClass rclass>:
- ANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (and rclass:$rA, rclass:$rB))]>;
-
-multiclass BitwiseAnd
-{
- def v16i8: ANDVecInst<v16i8>;
- def v8i16: ANDVecInst<v8i16>;
- def v4i32: ANDVecInst<v4i32>;
- def v2i64: ANDVecInst<v2i64>;
-
- def r128: ANDRegInst<GPRC>;
- def r64: ANDRegInst<R64C>;
- def r32: ANDRegInst<R32C>;
- def r16: ANDRegInst<R16C>;
- def r8: ANDRegInst<R8C>;
-
- //===---------------------------------------------
- // Special instructions to perform the fabs instruction
- def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
- [/* Intentionally does not match a pattern */]>;
-
- def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
- [/* Intentionally does not match a pattern */]>;
-
- def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [/* Intentionally does not match a pattern */]>;
-
- //===---------------------------------------------
-
- // Hacked form of AND to zero-extend 16-bit quantities to 32-bit
- // quantities -- see 16->32 zext pattern.
- //
- // This pattern is somewhat artificial, since it might match some
- // compiler generated pattern but it is unlikely to do so.
-
- def i16i32: ANDInst<(outs R32C:$rT), (ins R16C:$rA, R32C:$rB),
- [(set R32C:$rT, (and (zext R16C:$rA), R32C:$rB))]>;
-}
-
-defm AND : BitwiseAnd;
-
-
-def vnot_cell_conv : PatFrag<(ops node:$in),
- (xor node:$in, (bitconvert (v4i32 immAllOnesV)))>;
-
-// N.B.: vnot_cell_conv is one of those special target selection pattern
-// fragments,
-// in which we expect there to be a bit_convert on the constant. Bear in mind
-// that llvm translates "not <reg>" to "xor <reg>, -1" (or in this case, a
-// constant -1 vector.)
-
-class ANDCInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10000011010, OOL, IOL, "andc\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class ANDCVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
- ANDCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (and (vectype VECREG:$rA),
- (vnot_frag (vectype VECREG:$rB))))]>;
-
-class ANDCRegInst<RegisterClass rclass>:
- ANDCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (and rclass:$rA, (not rclass:$rB)))]>;
-
-multiclass AndComplement
-{
- def v16i8: ANDCVecInst<v16i8>;
- def v8i16: ANDCVecInst<v8i16>;
- def v4i32: ANDCVecInst<v4i32>;
- def v2i64: ANDCVecInst<v2i64>;
-
- def r128: ANDCRegInst<GPRC>;
- def r64: ANDCRegInst<R64C>;
- def r32: ANDCRegInst<R32C>;
- def r16: ANDCRegInst<R16C>;
- def r8: ANDCRegInst<R8C>;
-
- // Sometimes, the xor pattern has a bitcast constant:
- def v16i8_conv: ANDCVecInst<v16i8, vnot_cell_conv>;
-}
-
-defm ANDC : AndComplement;
-
-class ANDBIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI10Form<0b01101000, OOL, IOL, "andbi\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass AndByteImm
-{
- def v16i8: ANDBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
- [(set (v16i8 VECREG:$rT),
- (and (v16i8 VECREG:$rA),
- (v16i8 v16i8U8Imm:$val)))]>;
-
- def r8: ANDBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
- [(set R8C:$rT, (and R8C:$rA, immU8:$val))]>;
-}
-
-defm ANDBI : AndByteImm;
-
-class ANDHIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b10101000, OOL, IOL, "andhi\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass AndHalfwordImm
-{
- def v8i16: ANDHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v8i16 VECREG:$rT),
- (and (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
-
- def r16: ANDHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
- [(set R16C:$rT, (and R16C:$rA, i16ImmUns10:$val))]>;
-
- // Zero-extend i8 to i16:
- def i8i16: ANDHIInst<(outs R16C:$rT), (ins R8C:$rA, u10imm:$val),
- [(set R16C:$rT, (and (zext R8C:$rA), i16ImmUns10:$val))]>;
-}
-
-defm ANDHI : AndHalfwordImm;
-
-class ANDIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b00101000, OOL, IOL, "andi\t$rT, $rA, $val",
- IntegerOp, pattern>;
-
-multiclass AndWordImm
-{
- def v4i32: ANDIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v4i32 VECREG:$rT),
- (and (v4i32 VECREG:$rA), v4i32SExt10Imm:$val))]>;
-
- def r32: ANDIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
- [(set R32C:$rT, (and R32C:$rA, i32ImmSExt10:$val))]>;
-
- // Hacked form of ANDI to zero-extend i8 quantities to i32. See the zext 8->32
- // pattern below.
- def i8i32: ANDIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
- [(set R32C:$rT,
- (and (zext R8C:$rA), i32ImmSExt10:$val))]>;
-
- // Hacked form of ANDI to zero-extend i16 quantities to i32. See the
- // zext 16->32 pattern below.
- //
- // Note that this pattern is somewhat artificial, since it might match
- // something the compiler generates but is unlikely to occur in practice.
- def i16i32: ANDIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
- [(set R32C:$rT,
- (and (zext R16C:$rA), i32ImmSExt10:$val))]>;
-}
-
-defm ANDI : AndWordImm;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Bitwise OR group:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-// Bitwise "or" (N.B.: These are also register-register copy instructions...)
-class ORInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10000010000, OOL, IOL, "or\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class ORVecInst<ValueType vectype>:
- ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
- (vectype VECREG:$rB)))]>;
-
-class ORRegInst<RegisterClass rclass>:
- ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>;
-
-
-multiclass BitwiseOr
-{
- def v16i8: ORVecInst<v16i8>;
- def v8i16: ORVecInst<v8i16>;
- def v4i32: ORVecInst<v4i32>;
- def v2i64: ORVecInst<v2i64>;
-
- def v4f32: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v4f32 VECREG:$rT),
- (v4f32 (bitconvert (or (v4i32 VECREG:$rA),
- (v4i32 VECREG:$rB)))))]>;
-
- def v2f64: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v2f64 VECREG:$rT),
- (v2f64 (bitconvert (or (v2i64 VECREG:$rA),
- (v2i64 VECREG:$rB)))))]>;
-
- def r128: ORRegInst<GPRC>;
- def r64: ORRegInst<R64C>;
- def r32: ORRegInst<R32C>;
- def r16: ORRegInst<R16C>;
- def r8: ORRegInst<R8C>;
-
- // OR instructions used to copy f32 and f64 registers.
- def f32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
- [/* no pattern */]>;
-
- def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
- [/* no pattern */]>;
-}
-
-defm OR : BitwiseOr;
-
-//===----------------------------------------------------------------------===//
-// SPU::PREFSLOT2VEC and VEC2PREFSLOT re-interpretations of registers
-//===----------------------------------------------------------------------===//
-def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)),
- (COPY_TO_REGCLASS R8C:$rA, VECREG)>;
-
-def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)),
- (COPY_TO_REGCLASS R16C:$rA, VECREG)>;
-
-def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)),
- (COPY_TO_REGCLASS R32C:$rA, VECREG)>;
-
-def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)),
- (COPY_TO_REGCLASS R64C:$rA, VECREG)>;
-
-def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)),
- (COPY_TO_REGCLASS R32FP:$rA, VECREG)>;
-
-def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)),
- (COPY_TO_REGCLASS R64FP:$rA, VECREG)>;
-
-def : Pat<(i8 (SPUvec2prefslot (v16i8 VECREG:$rA))),
- (COPY_TO_REGCLASS (v16i8 VECREG:$rA), R8C)>;
-
-def : Pat<(i16 (SPUvec2prefslot (v8i16 VECREG:$rA))),
- (COPY_TO_REGCLASS (v8i16 VECREG:$rA), R16C)>;
-
-def : Pat<(i32 (SPUvec2prefslot (v4i32 VECREG:$rA))),
- (COPY_TO_REGCLASS (v4i32 VECREG:$rA), R32C)>;
-
-def : Pat<(i64 (SPUvec2prefslot (v2i64 VECREG:$rA))),
- (COPY_TO_REGCLASS (v2i64 VECREG:$rA), R64C)>;
-
-def : Pat<(f32 (SPUvec2prefslot (v4f32 VECREG:$rA))),
- (COPY_TO_REGCLASS (v4f32 VECREG:$rA), R32FP)>;
-
-def : Pat<(f64 (SPUvec2prefslot (v2f64 VECREG:$rA))),
- (COPY_TO_REGCLASS (v2f64 VECREG:$rA), R64FP)>;
-
-// Load Register: This is an assembler alias for a bitwise OR of a register
-// against itself. It's here because it brings some clarity to assembly
-// language output.
-
-let hasCtrlDep = 1 in {
- class LRInst<dag OOL, dag IOL>
- : SPUInstr<OOL, IOL, "lr\t$rT, $rA", IntegerOp> {
- bits<7> RA;
- bits<7> RT;
-
- let Pattern = [/*no pattern*/];
-
- let Inst{0-10} = 0b10000010000; /* It's an OR operation */
- let Inst{11-17} = RA;
- let Inst{18-24} = RA;
- let Inst{25-31} = RT;
- }
-
- class LRVecInst<ValueType vectype>:
- LRInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
-
- class LRRegInst<RegisterClass rclass>:
- LRInst<(outs rclass:$rT), (ins rclass:$rA)>;
-
- multiclass LoadRegister {
- def v2i64: LRVecInst<v2i64>;
- def v2f64: LRVecInst<v2f64>;
- def v4i32: LRVecInst<v4i32>;
- def v4f32: LRVecInst<v4f32>;
- def v8i16: LRVecInst<v8i16>;
- def v16i8: LRVecInst<v16i8>;
-
- def r128: LRRegInst<GPRC>;
- def r64: LRRegInst<R64C>;
- def f64: LRRegInst<R64FP>;
- def r32: LRRegInst<R32C>;
- def f32: LRRegInst<R32FP>;
- def r16: LRRegInst<R16C>;
- def r8: LRRegInst<R8C>;
- }
-
- defm LR: LoadRegister;
-}
-
-// ORC: Bitwise "or" with complement (c = a | ~b)
-
-class ORCInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10010010000, OOL, IOL, "orc\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class ORCVecInst<ValueType vectype>:
- ORCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
- (vnot (vectype VECREG:$rB))))]>;
-
-class ORCRegInst<RegisterClass rclass>:
- ORCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (or rclass:$rA, (not rclass:$rB)))]>;
-
-multiclass BitwiseOrComplement
-{
- def v16i8: ORCVecInst<v16i8>;
- def v8i16: ORCVecInst<v8i16>;
- def v4i32: ORCVecInst<v4i32>;
- def v2i64: ORCVecInst<v2i64>;
-
- def r128: ORCRegInst<GPRC>;
- def r64: ORCRegInst<R64C>;
- def r32: ORCRegInst<R32C>;
- def r16: ORCRegInst<R16C>;
- def r8: ORCRegInst<R8C>;
-}
-
-defm ORC : BitwiseOrComplement;
-
-// OR byte immediate
-class ORBIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI10Form<0b01100000, OOL, IOL, "orbi\t$rT, $rA, $val",
- IntegerOp, pattern>;
-
-class ORBIVecInst<ValueType vectype, PatLeaf immpred>:
- ORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
- [(set (v16i8 VECREG:$rT), (or (vectype VECREG:$rA),
- (vectype immpred:$val)))]>;
-
-multiclass BitwiseOrByteImm
-{
- def v16i8: ORBIVecInst<v16i8, v16i8U8Imm>;
-
- def r8: ORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
- [(set R8C:$rT, (or R8C:$rA, immU8:$val))]>;
-}
-
-defm ORBI : BitwiseOrByteImm;
-
-// OR halfword immediate
-class ORHIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI10Form<0b10100000, OOL, IOL, "orhi\t$rT, $rA, $val",
- IntegerOp, pattern>;
-
-class ORHIVecInst<ValueType vectype, PatLeaf immpred>:
- ORHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
- [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
- immpred:$val))]>;
-
-multiclass BitwiseOrHalfwordImm
-{
- def v8i16: ORHIVecInst<v8i16, v8i16Uns10Imm>;
-
- def r16: ORHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
- [(set R16C:$rT, (or R16C:$rA, i16ImmUns10:$val))]>;
-
- // Specialized ORHI form used to promote 8-bit registers to 16-bit
- def i8i16: ORHIInst<(outs R16C:$rT), (ins R8C:$rA, s10imm:$val),
- [(set R16C:$rT, (or (anyext R8C:$rA),
- i16ImmSExt10:$val))]>;
-}
-
-defm ORHI : BitwiseOrHalfwordImm;
-
-class ORIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI10Form<0b00100000, OOL, IOL, "ori\t$rT, $rA, $val",
- IntegerOp, pattern>;
-
-class ORIVecInst<ValueType vectype, PatLeaf immpred>:
- ORIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
- [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
- immpred:$val))]>;
-
-// Bitwise "or" with immediate
-multiclass BitwiseOrImm
-{
- def v4i32: ORIVecInst<v4i32, v4i32Uns10Imm>;
-
- def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
- [(set R32C:$rT, (or R32C:$rA, i32ImmSExt10:$val))]>;
-
- // i16i32: hacked version of the ori instruction to extend 16-bit quantities
- // to 32-bit quantities. used exclusively to match "anyext" conversions (vide
- // infra "anyext 16->32" pattern.)
- def i16i32: ORIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
- [(set R32C:$rT, (or (anyext R16C:$rA),
- i32ImmSExt10:$val))]>;
-
- // i8i32: Hacked version of the ORI instruction to extend 16-bit quantities
- // to 32-bit quantities. Used exclusively to match "anyext" conversions (vide
- // infra "anyext 16->32" pattern.)
- def i8i32: ORIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
- [(set R32C:$rT, (or (anyext R8C:$rA),
- i32ImmSExt10:$val))]>;
-}
-
-defm ORI : BitwiseOrImm;
-
-// ORX: "or" across the vector: or's $rA's word slots leaving the result in
-// $rT[0], slots 1-3 are zeroed.
-//
-// FIXME: Needs to match an intrinsic pattern.
-def ORXv4i32:
- RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "orx\t$rT, $rA, $rB", IntegerOp,
- []>;
-
-// XOR:
-
-class XORInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b10010010000, OOL, IOL, "xor\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class XORVecInst<ValueType vectype>:
- XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT), (xor (vectype VECREG:$rA),
- (vectype VECREG:$rB)))]>;
-
-class XORRegInst<RegisterClass rclass>:
- XORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (xor rclass:$rA, rclass:$rB))]>;
-
-multiclass BitwiseExclusiveOr
-{
- def v16i8: XORVecInst<v16i8>;
- def v8i16: XORVecInst<v8i16>;
- def v4i32: XORVecInst<v4i32>;
- def v2i64: XORVecInst<v2i64>;
-
- def r128: XORRegInst<GPRC>;
- def r64: XORRegInst<R64C>;
- def r32: XORRegInst<R32C>;
- def r16: XORRegInst<R16C>;
- def r8: XORRegInst<R8C>;
-
- // XOR instructions used to negate f32 and f64 quantities.
-
- def fneg32: XORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
- [/* no pattern */]>;
-
- def fneg64: XORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
- [/* no pattern */]>;
-
- def fnegvec: XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [/* no pattern, see fneg{32,64} */]>;
-}
-
-defm XOR : BitwiseExclusiveOr;
-
-//==----------------------------------------------------------
-
-class XORBIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI10Form<0b01100000, OOL, IOL, "xorbi\t$rT, $rA, $val",
- IntegerOp, pattern>;
-
-multiclass XorByteImm
-{
- def v16i8:
- XORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
- [(set (v16i8 VECREG:$rT), (xor (v16i8 VECREG:$rA), v16i8U8Imm:$val))]>;
-
- def r8:
- XORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
- [(set R8C:$rT, (xor R8C:$rA, immU8:$val))]>;
-}
-
-defm XORBI : XorByteImm;
-
-def XORHIv8i16:
- RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
- "xorhi\t$rT, $rA, $val", IntegerOp,
- [(set (v8i16 VECREG:$rT), (xor (v8i16 VECREG:$rA),
- v8i16SExt10Imm:$val))]>;
-
-def XORHIr16:
- RI10Form<0b10100000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
- "xorhi\t$rT, $rA, $val", IntegerOp,
- [(set R16C:$rT, (xor R16C:$rA, i16ImmSExt10:$val))]>;
-
-def XORIv4i32:
- RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm_i32:$val),
- "xori\t$rT, $rA, $val", IntegerOp,
- [(set (v4i32 VECREG:$rT), (xor (v4i32 VECREG:$rA),
- v4i32SExt10Imm:$val))]>;
-
-def XORIr32:
- RI10Form<0b00100000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
- "xori\t$rT, $rA, $val", IntegerOp,
- [(set R32C:$rT, (xor R32C:$rA, i32ImmSExt10:$val))]>;
-
-// NAND:
-
-class NANDInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10010011000, OOL, IOL, "nand\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class NANDVecInst<ValueType vectype>:
- NANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT), (vnot (and (vectype VECREG:$rA),
- (vectype VECREG:$rB))))]>;
-class NANDRegInst<RegisterClass rclass>:
- NANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (not (and rclass:$rA, rclass:$rB)))]>;
-
-multiclass BitwiseNand
-{
- def v16i8: NANDVecInst<v16i8>;
- def v8i16: NANDVecInst<v8i16>;
- def v4i32: NANDVecInst<v4i32>;
- def v2i64: NANDVecInst<v2i64>;
-
- def r128: NANDRegInst<GPRC>;
- def r64: NANDRegInst<R64C>;
- def r32: NANDRegInst<R32C>;
- def r16: NANDRegInst<R16C>;
- def r8: NANDRegInst<R8C>;
-}
-
-defm NAND : BitwiseNand;
-
-// NOR:
-
-class NORInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10010010000, OOL, IOL, "nor\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class NORVecInst<ValueType vectype>:
- NORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT), (vnot (or (vectype VECREG:$rA),
- (vectype VECREG:$rB))))]>;
-class NORRegInst<RegisterClass rclass>:
- NORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (not (or rclass:$rA, rclass:$rB)))]>;
-
-multiclass BitwiseNor
-{
- def v16i8: NORVecInst<v16i8>;
- def v8i16: NORVecInst<v8i16>;
- def v4i32: NORVecInst<v4i32>;
- def v2i64: NORVecInst<v2i64>;
-
- def r128: NORRegInst<GPRC>;
- def r64: NORRegInst<R64C>;
- def r32: NORRegInst<R32C>;
- def r16: NORRegInst<R16C>;
- def r8: NORRegInst<R8C>;
-}
-
-defm NOR : BitwiseNor;
-
-// Select bits:
-class SELBInst<dag OOL, dag IOL, list<dag> pattern>:
- RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC",
- IntegerOp, pattern>;
-
-class SELBVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
- SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- [(set (vectype VECREG:$rT),
- (or (and (vectype VECREG:$rC), (vectype VECREG:$rB)),
- (and (vnot_frag (vectype VECREG:$rC)),
- (vectype VECREG:$rA))))]>;
-
-class SELBVecVCondInst<ValueType vectype>:
- SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- [(set (vectype VECREG:$rT),
- (select (vectype VECREG:$rC),
- (vectype VECREG:$rB),
- (vectype VECREG:$rA)))]>;
-
-class SELBVecCondInst<ValueType vectype>:
- SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC),
- [(set (vectype VECREG:$rT),
- (select R32C:$rC,
- (vectype VECREG:$rB),
- (vectype VECREG:$rA)))]>;
-
-class SELBRegInst<RegisterClass rclass>:
- SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC),
- [(set rclass:$rT,
- (or (and rclass:$rB, rclass:$rC),
- (and rclass:$rA, (not rclass:$rC))))]>;
-
-class SELBRegCondInst<RegisterClass rcond, RegisterClass rclass>:
- SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC),
- [(set rclass:$rT,
- (select rcond:$rC, rclass:$rB, rclass:$rA))]>;
-
-multiclass SelectBits
-{
- def v16i8: SELBVecInst<v16i8>;
- def v8i16: SELBVecInst<v8i16>;
- def v4i32: SELBVecInst<v4i32>;
- def v2i64: SELBVecInst<v2i64, vnot_cell_conv>;
-
- def r128: SELBRegInst<GPRC>;
- def r64: SELBRegInst<R64C>;
- def r32: SELBRegInst<R32C>;
- def r16: SELBRegInst<R16C>;
- def r8: SELBRegInst<R8C>;
-
- def v16i8_cond: SELBVecCondInst<v16i8>;
- def v8i16_cond: SELBVecCondInst<v8i16>;
- def v4i32_cond: SELBVecCondInst<v4i32>;
- def v2i64_cond: SELBVecCondInst<v2i64>;
-
- def v16i8_vcond: SELBVecCondInst<v16i8>;
- def v8i16_vcond: SELBVecCondInst<v8i16>;
- def v4i32_vcond: SELBVecCondInst<v4i32>;
- def v2i64_vcond: SELBVecCondInst<v2i64>;
-
- def v4f32_cond:
- SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- [(set (v4f32 VECREG:$rT),
- (select (v4i32 VECREG:$rC),
- (v4f32 VECREG:$rB),
- (v4f32 VECREG:$rA)))]>;
-
- // SELBr64_cond is defined in SPU64InstrInfo.td
- def r32_cond: SELBRegCondInst<R32C, R32C>;
- def f32_cond: SELBRegCondInst<R32C, R32FP>;
- def r16_cond: SELBRegCondInst<R16C, R16C>;
- def r8_cond: SELBRegCondInst<R8C, R8C>;
-}
-
-defm SELB : SelectBits;
-
-class SPUselbPatVec<ValueType vectype, SPUInstr inst>:
- Pat<(SPUselb (vectype VECREG:$rA), (vectype VECREG:$rB), (vectype VECREG:$rC)),
- (inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>;
-
-def : SPUselbPatVec<v16i8, SELBv16i8>;
-def : SPUselbPatVec<v8i16, SELBv8i16>;
-def : SPUselbPatVec<v4i32, SELBv4i32>;
-def : SPUselbPatVec<v2i64, SELBv2i64>;
-
-class SPUselbPatReg<RegisterClass rclass, SPUInstr inst>:
- Pat<(SPUselb rclass:$rA, rclass:$rB, rclass:$rC),
- (inst rclass:$rA, rclass:$rB, rclass:$rC)>;
-
-def : SPUselbPatReg<R8C, SELBr8>;
-def : SPUselbPatReg<R16C, SELBr16>;
-def : SPUselbPatReg<R32C, SELBr32>;
-def : SPUselbPatReg<R64C, SELBr64>;
-
-// EQV: Equivalence (1 for each same bit, otherwise 0)
-//
-// Note: There are a lot of ways to match this bit operator and these patterns
-// attempt to be as exhaustive as possible.
-
-class EQVInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10010010000, OOL, IOL, "eqv\t$rT, $rA, $rB",
- IntegerOp, pattern>;
-
-class EQVVecInst<ValueType vectype>:
- EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
- (and (vnot (vectype VECREG:$rA)),
- (vnot (vectype VECREG:$rB)))))]>;
-
-class EQVRegInst<RegisterClass rclass>:
- EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (or (and rclass:$rA, rclass:$rB),
- (and (not rclass:$rA), (not rclass:$rB))))]>;
-
-class EQVVecPattern1<ValueType vectype>:
- EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (xor (vectype VECREG:$rA), (vnot (vectype VECREG:$rB))))]>;
-
-class EQVRegPattern1<RegisterClass rclass>:
- EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (xor rclass:$rA, (not rclass:$rB)))]>;
-
-class EQVVecPattern2<ValueType vectype>:
- EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
- (vnot (or (vectype VECREG:$rA), (vectype VECREG:$rB)))))]>;
-
-class EQVRegPattern2<RegisterClass rclass>:
- EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT,
- (or (and rclass:$rA, rclass:$rB),
- (not (or rclass:$rA, rclass:$rB))))]>;
-
-class EQVVecPattern3<ValueType vectype>:
- EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (not (xor (vectype VECREG:$rA), (vectype VECREG:$rB))))]>;
-
-class EQVRegPattern3<RegisterClass rclass>:
- EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (not (xor rclass:$rA, rclass:$rB)))]>;
-
-multiclass BitEquivalence
-{
- def v16i8: EQVVecInst<v16i8>;
- def v8i16: EQVVecInst<v8i16>;
- def v4i32: EQVVecInst<v4i32>;
- def v2i64: EQVVecInst<v2i64>;
-
- def v16i8_1: EQVVecPattern1<v16i8>;
- def v8i16_1: EQVVecPattern1<v8i16>;
- def v4i32_1: EQVVecPattern1<v4i32>;
- def v2i64_1: EQVVecPattern1<v2i64>;
-
- def v16i8_2: EQVVecPattern2<v16i8>;
- def v8i16_2: EQVVecPattern2<v8i16>;
- def v4i32_2: EQVVecPattern2<v4i32>;
- def v2i64_2: EQVVecPattern2<v2i64>;
-
- def v16i8_3: EQVVecPattern3<v16i8>;
- def v8i16_3: EQVVecPattern3<v8i16>;
- def v4i32_3: EQVVecPattern3<v4i32>;
- def v2i64_3: EQVVecPattern3<v2i64>;
-
- def r128: EQVRegInst<GPRC>;
- def r64: EQVRegInst<R64C>;
- def r32: EQVRegInst<R32C>;
- def r16: EQVRegInst<R16C>;
- def r8: EQVRegInst<R8C>;
-
- def r128_1: EQVRegPattern1<GPRC>;
- def r64_1: EQVRegPattern1<R64C>;
- def r32_1: EQVRegPattern1<R32C>;
- def r16_1: EQVRegPattern1<R16C>;
- def r8_1: EQVRegPattern1<R8C>;
-
- def r128_2: EQVRegPattern2<GPRC>;
- def r64_2: EQVRegPattern2<R64C>;
- def r32_2: EQVRegPattern2<R32C>;
- def r16_2: EQVRegPattern2<R16C>;
- def r8_2: EQVRegPattern2<R8C>;
-
- def r128_3: EQVRegPattern3<GPRC>;
- def r64_3: EQVRegPattern3<R64C>;
- def r32_3: EQVRegPattern3<R32C>;
- def r16_3: EQVRegPattern3<R16C>;
- def r8_3: EQVRegPattern3<R8C>;
-}
-
-defm EQV: BitEquivalence;
-
-//===----------------------------------------------------------------------===//
-// Vector shuffle...
-//===----------------------------------------------------------------------===//
-// SPUshuffle is generated in LowerVECTOR_SHUFFLE and gets replaced with SHUFB.
-// See the SPUshuffle SDNode operand above, which sets up the DAG pattern
-// matcher to emit something when the LowerVECTOR_SHUFFLE generates a node with
-// the SPUISD::SHUFB opcode.
-//===----------------------------------------------------------------------===//
-
-class SHUFBInst<dag OOL, dag IOL, list<dag> pattern>:
- RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC",
- ShuffleOp, pattern>;
-
-class SHUFBVecInst<ValueType resultvec, ValueType maskvec>:
- SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- [(set (resultvec VECREG:$rT),
- (SPUshuffle (resultvec VECREG:$rA),
- (resultvec VECREG:$rB),
- (maskvec VECREG:$rC)))]>;
-
-class SHUFBGPRCInst:
- SHUFBInst<(outs VECREG:$rT), (ins GPRC:$rA, GPRC:$rB, VECREG:$rC),
- [/* no pattern */]>;
-
-multiclass ShuffleBytes
-{
- def v16i8 : SHUFBVecInst<v16i8, v16i8>;
- def v16i8_m32 : SHUFBVecInst<v16i8, v4i32>;
- def v8i16 : SHUFBVecInst<v8i16, v16i8>;
- def v8i16_m32 : SHUFBVecInst<v8i16, v4i32>;
- def v4i32 : SHUFBVecInst<v4i32, v16i8>;
- def v4i32_m32 : SHUFBVecInst<v4i32, v4i32>;
- def v2i64 : SHUFBVecInst<v2i64, v16i8>;
- def v2i64_m32 : SHUFBVecInst<v2i64, v4i32>;
-
- def v4f32 : SHUFBVecInst<v4f32, v16i8>;
- def v4f32_m32 : SHUFBVecInst<v4f32, v4i32>;
-
- def v2f64 : SHUFBVecInst<v2f64, v16i8>;
- def v2f64_m32 : SHUFBVecInst<v2f64, v4i32>;
-
- def gprc : SHUFBGPRCInst;
-}
-
-defm SHUFB : ShuffleBytes;
-
-//===----------------------------------------------------------------------===//
-// Shift and rotate group:
-//===----------------------------------------------------------------------===//
-
-class SHLHInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b11111010000, OOL, IOL, "shlh\t$rT, $rA, $rB",
- RotShiftVec, pattern>;
-
-class SHLHVecInst<ValueType vectype>:
- SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (SPUvec_shl (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
-
-multiclass ShiftLeftHalfword
-{
- def v8i16: SHLHVecInst<v8i16>;
- def r16: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
- [(set R16C:$rT, (shl R16C:$rA, R16C:$rB))]>;
- def r16_r32: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
- [(set R16C:$rT, (shl R16C:$rA, R32C:$rB))]>;
-}
-
-defm SHLH : ShiftLeftHalfword;
-
-//===----------------------------------------------------------------------===//
-
-class SHLHIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b11111010000, OOL, IOL, "shlhi\t$rT, $rA, $val",
- RotShiftVec, pattern>;
-
-class SHLHIVecInst<ValueType vectype>:
- SHLHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
- [(set (vectype VECREG:$rT),
- (SPUvec_shl (vectype VECREG:$rA), (i16 uimm7:$val)))]>;
-
-multiclass ShiftLeftHalfwordImm
-{
- def v8i16: SHLHIVecInst<v8i16>;
- def r16: SHLHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
- [(set R16C:$rT, (shl R16C:$rA, (i16 uimm7:$val)))]>;
-}
-
-defm SHLHI : ShiftLeftHalfwordImm;
-
-def : Pat<(SPUvec_shl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
- (SHLHIv8i16 VECREG:$rA, (TO_IMM16 uimm7:$val))>;
-
-def : Pat<(shl R16C:$rA, (i32 uimm7:$val)),
- (SHLHIr16 R16C:$rA, (TO_IMM16 uimm7:$val))>;
-
-//===----------------------------------------------------------------------===//
-
-class SHLInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b11111010000, OOL, IOL, "shl\t$rT, $rA, $rB",
- RotShiftVec, pattern>;
-
-multiclass ShiftLeftWord
-{
- def v4i32:
- SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v4i32 VECREG:$rT),
- (SPUvec_shl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
- def r32:
- SHLInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>;
-}
-
-defm SHL: ShiftLeftWord;
-
-//===----------------------------------------------------------------------===//
-
-class SHLIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b11111010000, OOL, IOL, "shli\t$rT, $rA, $val",
- RotShiftVec, pattern>;
-
-multiclass ShiftLeftWordImm
-{
- def v4i32:
- SHLIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
- [(set (v4i32 VECREG:$rT),
- (SPUvec_shl (v4i32 VECREG:$rA), (i32 uimm7:$val)))]>;
-
- def r32:
- SHLIInst<(outs R32C:$rT), (ins R32C:$rA, u7imm_i32:$val),
- [(set R32C:$rT, (shl R32C:$rA, (i32 uimm7:$val)))]>;
-}
-
-defm SHLI : ShiftLeftWordImm;
-
-//===----------------------------------------------------------------------===//
-// SHLQBI vec form: Note that this will shift the entire vector (the 128-bit
-// register) to the left. Vector form is here to ensure type correctness.
-//
-// The shift count is in the lowest 3 bits (29-31) of $rB, so only a bit shift
-// of 7 bits is actually possible.
-//
-// Note also that SHLQBI/SHLQBII are used in conjunction with SHLQBY/SHLQBYI
-// to shift i64 and i128. SHLQBI is the residual left over after shifting by
-// bytes with SHLQBY.
-
-class SHLQBIInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b11011011100, OOL, IOL, "shlqbi\t$rT, $rA, $rB",
- RotShiftQuad, pattern>;
-
-class SHLQBIVecInst<ValueType vectype>:
- SHLQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- [(set (vectype VECREG:$rT),
- (SPUshlquad_l_bits (vectype VECREG:$rA), R32C:$rB))]>;
-
-class SHLQBIRegInst<RegisterClass rclass>:
- SHLQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
- [/* no pattern */]>;
-
-multiclass ShiftLeftQuadByBits
-{
- def v16i8: SHLQBIVecInst<v16i8>;
- def v8i16: SHLQBIVecInst<v8i16>;
- def v4i32: SHLQBIVecInst<v4i32>;
- def v4f32: SHLQBIVecInst<v4f32>;
- def v2i64: SHLQBIVecInst<v2i64>;
- def v2f64: SHLQBIVecInst<v2f64>;
-
- def r128: SHLQBIRegInst<GPRC>;
-}
-
-defm SHLQBI : ShiftLeftQuadByBits;
-
-// See note above on SHLQBI. In this case, the predicate actually does then
-// enforcement, whereas with SHLQBI, we have to "take it on faith."
-class SHLQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b11011111100, OOL, IOL, "shlqbii\t$rT, $rA, $val",
- RotShiftQuad, pattern>;
-
-class SHLQBIIVecInst<ValueType vectype>:
- SHLQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
- [(set (vectype VECREG:$rT),
- (SPUshlquad_l_bits (vectype VECREG:$rA), (i32 bitshift:$val)))]>;
-
-multiclass ShiftLeftQuadByBitsImm
-{
- def v16i8 : SHLQBIIVecInst<v16i8>;
- def v8i16 : SHLQBIIVecInst<v8i16>;
- def v4i32 : SHLQBIIVecInst<v4i32>;
- def v4f32 : SHLQBIIVecInst<v4f32>;
- def v2i64 : SHLQBIIVecInst<v2i64>;
- def v2f64 : SHLQBIIVecInst<v2f64>;
-}
-
-defm SHLQBII : ShiftLeftQuadByBitsImm;
-
-// SHLQBY, SHLQBYI vector forms: Shift the entire vector to the left by bytes,
-// not by bits. See notes above on SHLQBI.
-
-class SHLQBYInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b11111011100, OOL, IOL, "shlqby\t$rT, $rA, $rB",
- RotShiftQuad, pattern>;
-
-class SHLQBYVecInst<ValueType vectype>:
- SHLQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- [(set (vectype VECREG:$rT),
- (SPUshlquad_l_bytes (vectype VECREG:$rA), R32C:$rB))]>;
-
-multiclass ShiftLeftQuadBytes
-{
- def v16i8: SHLQBYVecInst<v16i8>;
- def v8i16: SHLQBYVecInst<v8i16>;
- def v4i32: SHLQBYVecInst<v4i32>;
- def v4f32: SHLQBYVecInst<v4f32>;
- def v2i64: SHLQBYVecInst<v2i64>;
- def v2f64: SHLQBYVecInst<v2f64>;
- def r128: SHLQBYInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
- [(set GPRC:$rT, (SPUshlquad_l_bytes GPRC:$rA, R32C:$rB))]>;
-}
-
-defm SHLQBY: ShiftLeftQuadBytes;
-
-class SHLQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b11111111100, OOL, IOL, "shlqbyi\t$rT, $rA, $val",
- RotShiftQuad, pattern>;
-
-class SHLQBYIVecInst<ValueType vectype>:
- SHLQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
- [(set (vectype VECREG:$rT),
- (SPUshlquad_l_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>;
-
-multiclass ShiftLeftQuadBytesImm
-{
- def v16i8: SHLQBYIVecInst<v16i8>;
- def v8i16: SHLQBYIVecInst<v8i16>;
- def v4i32: SHLQBYIVecInst<v4i32>;
- def v4f32: SHLQBYIVecInst<v4f32>;
- def v2i64: SHLQBYIVecInst<v2i64>;
- def v2f64: SHLQBYIVecInst<v2f64>;
- def r128: SHLQBYIInst<(outs GPRC:$rT), (ins GPRC:$rA, u7imm_i32:$val),
- [(set GPRC:$rT,
- (SPUshlquad_l_bytes GPRC:$rA, (i32 uimm7:$val)))]>;
-}
-
-defm SHLQBYI : ShiftLeftQuadBytesImm;
-
-class SHLQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB",
- RotShiftQuad, pattern>;
-
-class SHLQBYBIVecInst<ValueType vectype>:
- SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- [/* no pattern */]>;
-
-class SHLQBYBIRegInst<RegisterClass rclass>:
- SHLQBYBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
- [/* no pattern */]>;
-
-multiclass ShiftLeftQuadBytesBitCount
-{
- def v16i8: SHLQBYBIVecInst<v16i8>;
- def v8i16: SHLQBYBIVecInst<v8i16>;
- def v4i32: SHLQBYBIVecInst<v4i32>;
- def v4f32: SHLQBYBIVecInst<v4f32>;
- def v2i64: SHLQBYBIVecInst<v2i64>;
- def v2f64: SHLQBYBIVecInst<v2f64>;
-
- def r128: SHLQBYBIRegInst<GPRC>;
-}
-
-defm SHLQBYBI : ShiftLeftQuadBytesBitCount;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate halfword:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-class ROTHInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b00111010000, OOL, IOL, "roth\t$rT, $rA, $rB",
- RotShiftVec, pattern>;
-
-class ROTHVecInst<ValueType vectype>:
- ROTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (SPUvec_rotl VECREG:$rA, (v8i16 VECREG:$rB)))]>;
-
-class ROTHRegInst<RegisterClass rclass>:
- ROTHInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
- [(set rclass:$rT, (rotl rclass:$rA, rclass:$rB))]>;
-
-multiclass RotateLeftHalfword
-{
- def v8i16: ROTHVecInst<v8i16>;
- def r16: ROTHRegInst<R16C>;
-}
-
-defm ROTH: RotateLeftHalfword;
-
-def ROTHr16_r32: ROTHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
- [(set R16C:$rT, (rotl R16C:$rA, R32C:$rB))]>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate halfword, immediate:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-class ROTHIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b00111110000, OOL, IOL, "rothi\t$rT, $rA, $val",
- RotShiftVec, pattern>;
-
-class ROTHIVecInst<ValueType vectype>:
- ROTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
- [(set (vectype VECREG:$rT),
- (SPUvec_rotl VECREG:$rA, (i16 uimm7:$val)))]>;
-
-multiclass RotateLeftHalfwordImm
-{
- def v8i16: ROTHIVecInst<v8i16>;
- def r16: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
- [(set R16C:$rT, (rotl R16C:$rA, (i16 uimm7:$val)))]>;
- def r16_r32: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm_i32:$val),
- [(set R16C:$rT, (rotl R16C:$rA, (i32 uimm7:$val)))]>;
-}
-
-defm ROTHI: RotateLeftHalfwordImm;
-
-def : Pat<(SPUvec_rotl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
- (ROTHIv8i16 VECREG:$rA, (TO_IMM16 imm:$val))>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate word:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b00011010000, OOL, IOL, "rot\t$rT, $rA, $rB",
- RotShiftVec, pattern>;
-
-class ROTVecInst<ValueType vectype>:
- ROTInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- [(set (vectype VECREG:$rT),
- (SPUvec_rotl (vectype VECREG:$rA), R32C:$rB))]>;
-
-class ROTRegInst<RegisterClass rclass>:
- ROTInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
- [(set rclass:$rT,
- (rotl rclass:$rA, R32C:$rB))]>;
-
-multiclass RotateLeftWord
-{
- def v4i32: ROTVecInst<v4i32>;
- def r32: ROTRegInst<R32C>;
-}
-
-defm ROT: RotateLeftWord;
-
-// The rotate amount is in the same bits whether we've got an 8-bit, 16-bit or
-// 32-bit register
-def ROTr32_r16_anyext:
- ROTInst<(outs R32C:$rT), (ins R32C:$rA, R16C:$rB),
- [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R16C:$rB))))]>;
-
-def : Pat<(rotl R32C:$rA, (i32 (zext R16C:$rB))),
- (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
-
-def : Pat<(rotl R32C:$rA, (i32 (sext R16C:$rB))),
- (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
-
-def ROTr32_r8_anyext:
- ROTInst<(outs R32C:$rT), (ins R32C:$rA, R8C:$rB),
- [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R8C:$rB))))]>;
-
-def : Pat<(rotl R32C:$rA, (i32 (zext R8C:$rB))),
- (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
-
-def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))),
- (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate word, immediate
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b00011110000, OOL, IOL, "roti\t$rT, $rA, $val",
- RotShiftVec, pattern>;
-
-class ROTIVecInst<ValueType vectype, Operand optype, ValueType inttype, PatLeaf pred>:
- ROTIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
- [(set (vectype VECREG:$rT),
- (SPUvec_rotl (vectype VECREG:$rA), (inttype pred:$val)))]>;
-
-class ROTIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, PatLeaf pred>:
- ROTIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
- [(set rclass:$rT, (rotl rclass:$rA, (inttype pred:$val)))]>;
-
-multiclass RotateLeftWordImm
-{
- def v4i32: ROTIVecInst<v4i32, u7imm_i32, i32, uimm7>;
- def v4i32_i16: ROTIVecInst<v4i32, u7imm, i16, uimm7>;
- def v4i32_i8: ROTIVecInst<v4i32, u7imm_i8, i8, uimm7>;
-
- def r32: ROTIRegInst<R32C, u7imm_i32, i32, uimm7>;
- def r32_i16: ROTIRegInst<R32C, u7imm, i16, uimm7>;
- def r32_i8: ROTIRegInst<R32C, u7imm_i8, i8, uimm7>;
-}
-
-defm ROTI : RotateLeftWordImm;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate quad by byte (count)
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQBYInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB",
- RotShiftQuad, pattern>;
-
-class ROTQBYGenInst<ValueType type, RegisterClass rc>:
- ROTQBYInst<(outs rc:$rT), (ins rc:$rA, R32C:$rB),
- [(set (type rc:$rT),
- (SPUrotbytes_left (type rc:$rA), R32C:$rB))]>;
-
-class ROTQBYVecInst<ValueType type>:
- ROTQBYGenInst<type, VECREG>;
-
-multiclass RotateQuadLeftByBytes
-{
- def v16i8: ROTQBYVecInst<v16i8>;
- def v8i16: ROTQBYVecInst<v8i16>;
- def v4i32: ROTQBYVecInst<v4i32>;
- def v4f32: ROTQBYVecInst<v4f32>;
- def v2i64: ROTQBYVecInst<v2i64>;
- def v2f64: ROTQBYVecInst<v2f64>;
- def i128: ROTQBYGenInst<i128, GPRC>;
-}
-
-defm ROTQBY: RotateQuadLeftByBytes;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate quad by byte (count), immediate
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val",
- RotShiftQuad, pattern>;
-
-class ROTQBYIGenInst<ValueType type, RegisterClass rclass>:
- ROTQBYIInst<(outs rclass:$rT), (ins rclass:$rA, u7imm:$val),
- [(set (type rclass:$rT),
- (SPUrotbytes_left (type rclass:$rA), (i16 uimm7:$val)))]>;
-
-class ROTQBYIVecInst<ValueType vectype>:
- ROTQBYIGenInst<vectype, VECREG>;
-
-multiclass RotateQuadByBytesImm
-{
- def v16i8: ROTQBYIVecInst<v16i8>;
- def v8i16: ROTQBYIVecInst<v8i16>;
- def v4i32: ROTQBYIVecInst<v4i32>;
- def v4f32: ROTQBYIVecInst<v4f32>;
- def v2i64: ROTQBYIVecInst<v2i64>;
- def vfi64: ROTQBYIVecInst<v2f64>;
- def i128: ROTQBYIGenInst<i128, GPRC>;
-}
-
-defm ROTQBYI: RotateQuadByBytesImm;
-
-// See ROTQBY note above.
-class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b00110011100, OOL, IOL,
- "rotqbybi\t$rT, $rA, $shift",
- RotShiftQuad, pattern>;
-
-class ROTQBYBIVecInst<ValueType vectype, RegisterClass rclass>:
- ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift),
- [(set (vectype VECREG:$rT),
- (SPUrotbytes_left_bits (vectype VECREG:$rA), rclass:$shift))]>;
-
-multiclass RotateQuadByBytesByBitshift {
- def v16i8_r32: ROTQBYBIVecInst<v16i8, R32C>;
- def v8i16_r32: ROTQBYBIVecInst<v8i16, R32C>;
- def v4i32_r32: ROTQBYBIVecInst<v4i32, R32C>;
- def v2i64_r32: ROTQBYBIVecInst<v2i64, R32C>;
-}
-
-defm ROTQBYBI : RotateQuadByBytesByBitshift;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// See ROTQBY note above.
-//
-// Assume that the user of this instruction knows to shift the rotate count
-// into bit 29
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b00011011100, OOL, IOL, "rotqbi\t$rT, $rA, $rB",
- RotShiftQuad, pattern>;
-
-class ROTQBIVecInst<ValueType vectype>:
- ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- [/* no pattern yet */]>;
-
-class ROTQBIRegInst<RegisterClass rclass>:
- ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
- [/* no pattern yet */]>;
-
-multiclass RotateQuadByBitCount
-{
- def v16i8: ROTQBIVecInst<v16i8>;
- def v8i16: ROTQBIVecInst<v8i16>;
- def v4i32: ROTQBIVecInst<v4i32>;
- def v2i64: ROTQBIVecInst<v2i64>;
-
- def r128: ROTQBIRegInst<GPRC>;
- def r64: ROTQBIRegInst<R64C>;
-}
-
-defm ROTQBI: RotateQuadByBitCount;
-
-class ROTQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val",
- RotShiftQuad, pattern>;
-
-class ROTQBIIVecInst<ValueType vectype, Operand optype, ValueType inttype,
- PatLeaf pred>:
- ROTQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
- [/* no pattern yet */]>;
-
-class ROTQBIIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
- PatLeaf pred>:
- ROTQBIIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
- [/* no pattern yet */]>;
-
-multiclass RotateQuadByBitCountImm
-{
- def v16i8: ROTQBIIVecInst<v16i8, u7imm_i32, i32, uimm7>;
- def v8i16: ROTQBIIVecInst<v8i16, u7imm_i32, i32, uimm7>;
- def v4i32: ROTQBIIVecInst<v4i32, u7imm_i32, i32, uimm7>;
- def v2i64: ROTQBIIVecInst<v2i64, u7imm_i32, i32, uimm7>;
-
- def r128: ROTQBIIRegInst<GPRC, u7imm_i32, i32, uimm7>;
- def r64: ROTQBIIRegInst<R64C, u7imm_i32, i32, uimm7>;
-}
-
-defm ROTQBII : RotateQuadByBitCountImm;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// ROTHM v8i16 form:
-// NOTE(1): No vector rotate is generated by the C/C++ frontend (today),
-// so this only matches a synthetically generated/lowered code
-// fragment.
-// NOTE(2): $rB must be negated before the right rotate!
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTHMInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10111010000, OOL, IOL, "rothm\t$rT, $rA, $rB",
- RotShiftVec, pattern>;
-
-def ROTHMv8i16:
- ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
- (ROTHMv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
-
-// ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left
-// Note: This instruction doesn't match a pattern because rB must be negated
-// for the instruction to work. Thus, the pattern below the instruction!
-
-def ROTHMr16:
- ROTHMInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
- [/* see patterns below - $rB must be negated! */]>;
-
-def : Pat<(srl R16C:$rA, R32C:$rB),
- (ROTHMr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(srl R16C:$rA, R16C:$rB),
- (ROTHMr16 R16C:$rA,
- (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(srl R16C:$rA, R8C:$rB),
- (ROTHMr16 R16C:$rA,
- (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>;
-
-// ROTHMI v8i16 form: See the comment for ROTHM v8i16. The difference here is
-// that the immediate can be complemented, so that the user doesn't have to
-// worry about it.
-
-class ROTHMIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b10111110000, OOL, IOL, "rothmi\t$rT, $rA, $val",
- RotShiftVec, pattern>;
-
-def ROTHMIv8i16:
- ROTHMIInst<(outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
- [/* no pattern */]>;
-
-def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i32 imm:$val)),
- (ROTHMIv8i16 VECREG:$rA, imm:$val)>;
-
-def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i16 imm:$val)),
- (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>;
-
-def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i8 imm:$val)),
- (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>;
-
-def ROTHMIr16:
- ROTHMIInst<(outs R16C:$rT), (ins R16C:$rA, rothNeg7imm:$val),
- [/* no pattern */]>;
-
-def: Pat<(srl R16C:$rA, (i32 uimm7:$val)),
- (ROTHMIr16 R16C:$rA, uimm7:$val)>;
-
-def: Pat<(srl R16C:$rA, (i16 uimm7:$val)),
- (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
-
-def: Pat<(srl R16C:$rA, (i8 uimm7:$val)),
- (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
-
-// ROTM v4i32 form: See the ROTHM v8i16 comments.
-class ROTMInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10011010000, OOL, IOL, "rotm\t$rT, $rA, $rB",
- RotShiftVec, pattern>;
-
-def ROTMv4i32:
- ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
- (ROTMv4i32 VECREG:$rA, (SFIvec VECREG:$rB, 0))>;
-
-def ROTMr32:
- ROTMInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(srl R32C:$rA, R32C:$rB),
- (ROTMr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(srl R32C:$rA, R16C:$rB),
- (ROTMr32 R32C:$rA,
- (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(srl R32C:$rA, R8C:$rB),
- (ROTMr32 R32C:$rA,
- (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
-
-// ROTMI v4i32 form: See the comment for ROTHM v8i16.
-def ROTMIv4i32:
- RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
- "rotmi\t$rT, $rA, $val", RotShiftVec,
- [(set (v4i32 VECREG:$rT),
- (SPUvec_srl VECREG:$rA, (i32 uimm7:$val)))]>;
-
-def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i16 uimm7:$val)),
- (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>;
-
-def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i8 uimm7:$val)),
- (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>;
-
-// ROTMI r32 form: know how to complement the immediate value.
-def ROTMIr32:
- RI7Form<0b10011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val),
- "rotmi\t$rT, $rA, $val", RotShiftVec,
- [(set R32C:$rT, (srl R32C:$rA, (i32 uimm7:$val)))]>;
-
-def : Pat<(srl R32C:$rA, (i16 imm:$val)),
- (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>;
-
-def : Pat<(srl R32C:$rA, (i8 imm:$val)),
- (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// ROTQMBY: This is a vector form merely so that when used in an
-// instruction pattern, type checking will succeed. This instruction assumes
-// that the user knew to negate $rB.
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10111011100, OOL, IOL, "rotqmby\t$rT, $rA, $rB",
- RotShiftQuad, pattern>;
-
-class ROTQMBYVecInst<ValueType vectype>:
- ROTQMBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- [/* no pattern, $rB must be negated */]>;
-
-class ROTQMBYRegInst<RegisterClass rclass>:
- ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
- [/* no pattern */]>;
-
-multiclass RotateQuadBytes
-{
- def v16i8: ROTQMBYVecInst<v16i8>;
- def v8i16: ROTQMBYVecInst<v8i16>;
- def v4i32: ROTQMBYVecInst<v4i32>;
- def v2i64: ROTQMBYVecInst<v2i64>;
-
- def r128: ROTQMBYRegInst<GPRC>;
- def r64: ROTQMBYRegInst<R64C>;
-}
-
-defm ROTQMBY : RotateQuadBytes;
-
-def : Pat<(SPUsrl_bytes GPRC:$rA, R32C:$rB),
- (ROTQMBYr128 GPRC:$rA,
- (SFIr32 R32C:$rB, 0))>;
-
-class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val",
- RotShiftQuad, pattern>;
-
-class ROTQMBYIVecInst<ValueType vectype>:
- ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
- [/* no pattern */]>;
-
-class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
- PatLeaf pred>:
- ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
- [/* no pattern */]>;
-
-// 128-bit zero extension form:
-class ROTQMBYIZExtInst<RegisterClass rclass, Operand optype, PatLeaf pred>:
- ROTQMBYIInst<(outs GPRC:$rT), (ins rclass:$rA, optype:$val),
- [/* no pattern */]>;
-
-multiclass RotateQuadBytesImm
-{
- def v16i8: ROTQMBYIVecInst<v16i8>;
- def v8i16: ROTQMBYIVecInst<v8i16>;
- def v4i32: ROTQMBYIVecInst<v4i32>;
- def v2i64: ROTQMBYIVecInst<v2i64>;
-
- def r128: ROTQMBYIRegInst<GPRC, rotNeg7imm, i32, uimm7>;
- def r64: ROTQMBYIRegInst<R64C, rotNeg7imm, i32, uimm7>;
-
- def r128_zext_r8: ROTQMBYIZExtInst<R8C, rotNeg7imm, uimm7>;
- def r128_zext_r16: ROTQMBYIZExtInst<R16C, rotNeg7imm, uimm7>;
- def r128_zext_r32: ROTQMBYIZExtInst<R32C, rotNeg7imm, uimm7>;
- def r128_zext_r64: ROTQMBYIZExtInst<R64C, rotNeg7imm, uimm7>;
-}
-
-defm ROTQMBYI : RotateQuadBytesImm;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate right and mask by bit count
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10110011100, OOL, IOL, "rotqmbybi\t$rT, $rA, $rB",
- RotShiftQuad, pattern>;
-
-class ROTQMBYBIVecInst<ValueType vectype>:
- ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- [/* no pattern, */]>;
-
-multiclass RotateMaskQuadByBitCount
-{
- def v16i8: ROTQMBYBIVecInst<v16i8>;
- def v8i16: ROTQMBYBIVecInst<v8i16>;
- def v4i32: ROTQMBYBIVecInst<v4i32>;
- def v2i64: ROTQMBYBIVecInst<v2i64>;
- def r128: ROTQMBYBIInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
- [/*no pattern*/]>;
-}
-
-defm ROTQMBYBI: RotateMaskQuadByBitCount;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate quad and mask by bits
-// Note that the rotate amount has to be negated
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQMBIInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b10011011100, OOL, IOL, "rotqmbi\t$rT, $rA, $rB",
- RotShiftQuad, pattern>;
-
-class ROTQMBIVecInst<ValueType vectype>:
- ROTQMBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- [/* no pattern */]>;
-
-class ROTQMBIRegInst<RegisterClass rclass>:
- ROTQMBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
- [/* no pattern */]>;
-
-multiclass RotateMaskQuadByBits
-{
- def v16i8: ROTQMBIVecInst<v16i8>;
- def v8i16: ROTQMBIVecInst<v8i16>;
- def v4i32: ROTQMBIVecInst<v4i32>;
- def v2i64: ROTQMBIVecInst<v2i64>;
-
- def r128: ROTQMBIRegInst<GPRC>;
- def r64: ROTQMBIRegInst<R64C>;
-}
-
-defm ROTQMBI: RotateMaskQuadByBits;
-
-def : Pat<(srl GPRC:$rA, R32C:$rB),
- (ROTQMBYBIr128 (ROTQMBIr128 GPRC:$rA,
- (SFIr32 R32C:$rB, 0)),
- (SFIr32 R32C:$rB, 0))>;
-
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Rotate quad and mask by bits, immediate
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>:
- RI7Form<0b10011111100, OOL, IOL, "rotqmbii\t$rT, $rA, $val",
- RotShiftQuad, pattern>;
-
-class ROTQMBIIVecInst<ValueType vectype>:
- ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
- [/* no pattern */]>;
-
-class ROTQMBIIRegInst<RegisterClass rclass>:
- ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val),
- [/* no pattern */]>;
-
-multiclass RotateMaskQuadByBitsImm
-{
- def v16i8: ROTQMBIIVecInst<v16i8>;
- def v8i16: ROTQMBIIVecInst<v8i16>;
- def v4i32: ROTQMBIIVecInst<v4i32>;
- def v2i64: ROTQMBIIVecInst<v2i64>;
-
- def r128: ROTQMBIIRegInst<GPRC>;
- def r64: ROTQMBIIRegInst<R64C>;
-}
-
-defm ROTQMBII: RotateMaskQuadByBitsImm;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def ROTMAHv8i16:
- RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "rotmah\t$rT, $rA, $rB", RotShiftVec,
- [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
- (ROTMAHv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
-
-def ROTMAHr16:
- RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
- "rotmah\t$rT, $rA, $rB", RotShiftVec,
- [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(sra R16C:$rA, R32C:$rB),
- (ROTMAHr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(sra R16C:$rA, R16C:$rB),
- (ROTMAHr16 R16C:$rA,
- (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(sra R16C:$rA, R8C:$rB),
- (ROTMAHr16 R16C:$rA,
- (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
-
-def ROTMAHIv8i16:
- RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
- "rotmahi\t$rT, $rA, $val", RotShiftVec,
- [(set (v8i16 VECREG:$rT),
- (SPUvec_sra (v8i16 VECREG:$rA), (i32 uimm7:$val)))]>;
-
-def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i16 uimm7:$val)),
- (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>;
-
-def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i8 uimm7:$val)),
- (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>;
-
-def ROTMAHIr16:
- RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val),
- "rotmahi\t$rT, $rA, $val", RotShiftVec,
- [(set R16C:$rT, (sra R16C:$rA, (i16 uimm7:$val)))]>;
-
-def : Pat<(sra R16C:$rA, (i32 imm:$val)),
- (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
-
-def : Pat<(sra R16C:$rA, (i8 imm:$val)),
- (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
-
-def ROTMAv4i32:
- RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "rotma\t$rT, $rA, $rB", RotShiftVec,
- [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
- (ROTMAv4i32 VECREG:$rA, (SFIvec (v4i32 VECREG:$rB), 0))>;
-
-def ROTMAr32:
- RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- "rotma\t$rT, $rA, $rB", RotShiftVec,
- [/* see patterns below - $rB must be negated */]>;
-
-def : Pat<(sra R32C:$rA, R32C:$rB),
- (ROTMAr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(sra R32C:$rA, R16C:$rB),
- (ROTMAr32 R32C:$rA,
- (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(sra R32C:$rA, R8C:$rB),
- (ROTMAr32 R32C:$rA,
- (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
-
-class ROTMAIInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b01011110000, OOL, IOL,
- "rotmai\t$rT, $rA, $val",
- RotShiftVec, pattern>;
-
-class ROTMAIVecInst<ValueType vectype, Operand intop, ValueType inttype>:
- ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val),
- [(set (vectype VECREG:$rT),
- (SPUvec_sra VECREG:$rA, (inttype uimm7:$val)))]>;
-
-class ROTMAIRegInst<RegisterClass rclass, Operand intop, ValueType inttype>:
- ROTMAIInst<(outs rclass:$rT), (ins rclass:$rA, intop:$val),
- [(set rclass:$rT, (sra rclass:$rA, (inttype uimm7:$val)))]>;
-
-multiclass RotateMaskAlgebraicImm {
- def v2i64_i32 : ROTMAIVecInst<v2i64, rotNeg7imm, i32>;
- def v4i32_i32 : ROTMAIVecInst<v4i32, rotNeg7imm, i32>;
- def r64_i32 : ROTMAIRegInst<R64C, rotNeg7imm, i32>;
- def r32_i32 : ROTMAIRegInst<R32C, rotNeg7imm, i32>;
-}
-
-defm ROTMAI : RotateMaskAlgebraicImm;
-
-//===----------------------------------------------------------------------===//
-// Branch and conditionals:
-//===----------------------------------------------------------------------===//
-
-let isTerminator = 1, isBarrier = 1 in {
- // Halt If Equal (r32 preferred slot only, no vector form)
- def HEQr32:
- RRForm_3<0b00011011110, (outs), (ins R32C:$rA, R32C:$rB),
- "heq\t$rA, $rB", BranchResolv,
- [/* no pattern to match */]>;
-
- def HEQIr32 :
- RI10Form_2<0b11111110, (outs), (ins R32C:$rA, s10imm:$val),
- "heqi\t$rA, $val", BranchResolv,
- [/* no pattern to match */]>;
-
- // HGT/HGTI: These instructions use signed arithmetic for the comparison,
- // contrasting with HLGT/HLGTI, which use unsigned comparison:
- def HGTr32:
- RRForm_3<0b00011010010, (outs), (ins R32C:$rA, R32C:$rB),
- "hgt\t$rA, $rB", BranchResolv,
- [/* no pattern to match */]>;
-
- def HGTIr32:
- RI10Form_2<0b11110010, (outs), (ins R32C:$rA, s10imm:$val),
- "hgti\t$rA, $val", BranchResolv,
- [/* no pattern to match */]>;
-
- def HLGTr32:
- RRForm_3<0b00011011010, (outs), (ins R32C:$rA, R32C:$rB),
- "hlgt\t$rA, $rB", BranchResolv,
- [/* no pattern to match */]>;
-
- def HLGTIr32:
- RI10Form_2<0b11111010, (outs), (ins R32C:$rA, s10imm:$val),
- "hlgti\t$rA, $val", BranchResolv,
- [/* no pattern to match */]>;
-}
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// Comparison operators for i8, i16 and i32:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class CEQBInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b00001011110, OOL, IOL, "ceqb\t$rT, $rA, $rB",
- ByteOp, pattern>;
-
-multiclass CmpEqualByte
-{
- def v16i8 :
- CEQBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v16i8 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
- def r8 :
- CEQBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
- [(set R8C:$rT, (seteq R8C:$rA, R8C:$rB))]>;
-}
-
-class CEQBIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b01111110, OOL, IOL, "ceqbi\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass CmpEqualByteImm
-{
- def v16i8 :
- CEQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
- [(set (v16i8 VECREG:$rT), (seteq (v16i8 VECREG:$rA),
- v16i8SExt8Imm:$val))]>;
- def r8:
- CEQBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
- [(set R8C:$rT, (seteq R8C:$rA, immSExt8:$val))]>;
-}
-
-class CEQHInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b00010011110, OOL, IOL, "ceqh\t$rT, $rA, $rB",
- ByteOp, pattern>;
-
-multiclass CmpEqualHalfword
-{
- def v8i16 : CEQHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v8i16 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
- def r16 : CEQHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
- [(set R16C:$rT, (seteq R16C:$rA, R16C:$rB))]>;
-}
-
-class CEQHIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b10111110, OOL, IOL, "ceqhi\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass CmpEqualHalfwordImm
-{
- def v8i16 : CEQHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v8i16 VECREG:$rT),
- (seteq (v8i16 VECREG:$rA),
- (v8i16 v8i16SExt10Imm:$val)))]>;
- def r16 : CEQHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
- [(set R16C:$rT, (seteq R16C:$rA, i16ImmSExt10:$val))]>;
-}
-
-class CEQInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b00000011110, OOL, IOL, "ceq\t$rT, $rA, $rB",
- ByteOp, pattern>;
-
-multiclass CmpEqualWord
-{
- def v4i32 : CEQInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v4i32 VECREG:$rT),
- (seteq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
- def r32 : CEQInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- [(set R32C:$rT, (seteq R32C:$rA, R32C:$rB))]>;
-}
-
-class CEQIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b00111110, OOL, IOL, "ceqi\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass CmpEqualWordImm
-{
- def v4i32 : CEQIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v4i32 VECREG:$rT),
- (seteq (v4i32 VECREG:$rA),
- (v4i32 v4i32SExt16Imm:$val)))]>;
-
- def r32: CEQIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
- [(set R32C:$rT, (seteq R32C:$rA, i32ImmSExt10:$val))]>;
-}
-
-class CGTBInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b00001010010, OOL, IOL, "cgtb\t$rT, $rA, $rB",
- ByteOp, pattern>;
-
-multiclass CmpGtrByte
-{
- def v16i8 :
- CGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v16i8 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
- def r8 :
- CGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
- [(set R8C:$rT, (setgt R8C:$rA, R8C:$rB))]>;
-}
-
-class CGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b01110010, OOL, IOL, "cgtbi\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass CmpGtrByteImm
-{
- def v16i8 :
- CGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
- [(set (v16i8 VECREG:$rT), (setgt (v16i8 VECREG:$rA),
- v16i8SExt8Imm:$val))]>;
- def r8:
- CGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
- [(set R8C:$rT, (setgt R8C:$rA, immSExt8:$val))]>;
-}
-
-class CGTHInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b00010010010, OOL, IOL, "cgth\t$rT, $rA, $rB",
- ByteOp, pattern>;
-
-multiclass CmpGtrHalfword
-{
- def v8i16 : CGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v8i16 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
- def r16 : CGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
- [(set R16C:$rT, (setgt R16C:$rA, R16C:$rB))]>;
-}
-
-class CGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b10110010, OOL, IOL, "cgthi\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass CmpGtrHalfwordImm
-{
- def v8i16 : CGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v8i16 VECREG:$rT),
- (setgt (v8i16 VECREG:$rA),
- (v8i16 v8i16SExt10Imm:$val)))]>;
- def r16 : CGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
- [(set R16C:$rT, (setgt R16C:$rA, i16ImmSExt10:$val))]>;
-}
-
-class CGTInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b00000010010, OOL, IOL, "cgt\t$rT, $rA, $rB",
- ByteOp, pattern>;
-
-multiclass CmpGtrWord
-{
- def v4i32 : CGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v4i32 VECREG:$rT),
- (setgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
- def r32 : CGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- [(set R32C:$rT, (setgt R32C:$rA, R32C:$rB))]>;
-}
-
-class CGTIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b00110010, OOL, IOL, "cgti\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass CmpGtrWordImm
-{
- def v4i32 : CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v4i32 VECREG:$rT),
- (setgt (v4i32 VECREG:$rA),
- (v4i32 v4i32SExt16Imm:$val)))]>;
-
- def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
- [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>;
-
- // CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence:
- def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v4i32 VECREG:$rT),
- (setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))),
- (v4i32 v4i32SExt16Imm:$val)))]>;
-
- def f32: CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val),
- [/* no pattern */]>;
-}
-
-class CLGTBInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b00001011010, OOL, IOL, "clgtb\t$rT, $rA, $rB",
- ByteOp, pattern>;
-
-multiclass CmpLGtrByte
-{
- def v16i8 :
- CLGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v16i8 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
- def r8 :
- CLGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
- [(set R8C:$rT, (setugt R8C:$rA, R8C:$rB))]>;
-}
-
-class CLGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b01111010, OOL, IOL, "clgtbi\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass CmpLGtrByteImm
-{
- def v16i8 :
- CLGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
- [(set (v16i8 VECREG:$rT), (setugt (v16i8 VECREG:$rA),
- v16i8SExt8Imm:$val))]>;
- def r8:
- CLGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
- [(set R8C:$rT, (setugt R8C:$rA, immSExt8:$val))]>;
-}
-
-class CLGTHInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b00010011010, OOL, IOL, "clgth\t$rT, $rA, $rB",
- ByteOp, pattern>;
-
-multiclass CmpLGtrHalfword
-{
- def v8i16 : CLGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v8i16 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
- (v8i16 VECREG:$rB)))]>;
-
- def r16 : CLGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
- [(set R16C:$rT, (setugt R16C:$rA, R16C:$rB))]>;
-}
-
-class CLGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b10111010, OOL, IOL, "clgthi\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass CmpLGtrHalfwordImm
-{
- def v8i16 : CLGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v8i16 VECREG:$rT),
- (setugt (v8i16 VECREG:$rA),
- (v8i16 v8i16SExt10Imm:$val)))]>;
- def r16 : CLGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
- [(set R16C:$rT, (setugt R16C:$rA, i16ImmSExt10:$val))]>;
-}
-
-class CLGTInst<dag OOL, dag IOL, list<dag> pattern> :
- RRForm<0b00000011010, OOL, IOL, "clgt\t$rT, $rA, $rB",
- ByteOp, pattern>;
-
-multiclass CmpLGtrWord
-{
- def v4i32 : CLGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v4i32 VECREG:$rT),
- (setugt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
-
- def r32 : CLGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- [(set R32C:$rT, (setugt R32C:$rA, R32C:$rB))]>;
-}
-
-class CLGTIInst<dag OOL, dag IOL, list<dag> pattern> :
- RI10Form<0b00111010, OOL, IOL, "clgti\t$rT, $rA, $val",
- ByteOp, pattern>;
-
-multiclass CmpLGtrWordImm
-{
- def v4i32 : CLGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
- [(set (v4i32 VECREG:$rT),
- (setugt (v4i32 VECREG:$rA),
- (v4i32 v4i32SExt16Imm:$val)))]>;
-
- def r32: CLGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
- [(set R32C:$rT, (setugt R32C:$rA, i32ImmSExt10:$val))]>;
-}
-
-defm CEQB : CmpEqualByte;
-defm CEQBI : CmpEqualByteImm;
-defm CEQH : CmpEqualHalfword;
-defm CEQHI : CmpEqualHalfwordImm;
-defm CEQ : CmpEqualWord;
-defm CEQI : CmpEqualWordImm;
-defm CGTB : CmpGtrByte;
-defm CGTBI : CmpGtrByteImm;
-defm CGTH : CmpGtrHalfword;
-defm CGTHI : CmpGtrHalfwordImm;
-defm CGT : CmpGtrWord;
-defm CGTI : CmpGtrWordImm;
-defm CLGTB : CmpLGtrByte;
-defm CLGTBI : CmpLGtrByteImm;
-defm CLGTH : CmpLGtrHalfword;
-defm CLGTHI : CmpLGtrHalfwordImm;
-defm CLGT : CmpLGtrWord;
-defm CLGTI : CmpLGtrWordImm;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// For SETCC primitives not supported above (setlt, setle, setge, etc.)
-// define a pattern to generate the right code, as a binary operator
-// (in a manner of speaking.)
-//
-// Notes:
-// 1. This only matches the setcc set of conditionals. Special pattern
-// matching is used for select conditionals.
-//
-// 2. The "DAG" versions of these classes is almost exclusively used for
-// i64 comparisons. See the tblgen fundamentals documentation for what
-// ".ResultInstrs[0]" means; see TargetSelectionDAG.td and the Pattern
-// class for where ResultInstrs originates.
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class SETCCNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
- SPUInstr xorinst, SPUInstr cmpare>:
- Pat<(cond rclass:$rA, rclass:$rB),
- (xorinst (cmpare rclass:$rA, rclass:$rB), (inttype -1))>;
-
-class SETCCNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
- PatLeaf immpred, SPUInstr xorinst, SPUInstr cmpare>:
- Pat<(cond rclass:$rA, (inttype immpred:$imm)),
- (xorinst (cmpare rclass:$rA, (inttype immpred:$imm)), (inttype -1))>;
-
-def : SETCCNegCondReg<setne, R8C, i8, XORBIr8, CEQBr8>;
-def : SETCCNegCondImm<setne, R8C, i8, immSExt8, XORBIr8, CEQBIr8>;
-
-def : SETCCNegCondReg<setne, R16C, i16, XORHIr16, CEQHr16>;
-def : SETCCNegCondImm<setne, R16C, i16, i16ImmSExt10, XORHIr16, CEQHIr16>;
-
-def : SETCCNegCondReg<setne, R32C, i32, XORIr32, CEQr32>;
-def : SETCCNegCondImm<setne, R32C, i32, i32ImmSExt10, XORIr32, CEQIr32>;
-
-class SETCCBinOpReg<PatFrag cond, RegisterClass rclass,
- SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
- Pat<(cond rclass:$rA, rclass:$rB),
- (binop (cmpOp1 rclass:$rA, rclass:$rB),
- (cmpOp2 rclass:$rA, rclass:$rB))>;
-
-class SETCCBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
- ValueType immtype,
- SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
- Pat<(cond rclass:$rA, (immtype immpred:$imm)),
- (binop (cmpOp1 rclass:$rA, (immtype immpred:$imm)),
- (cmpOp2 rclass:$rA, (immtype immpred:$imm)))>;
-
-def : SETCCBinOpReg<setge, R8C, ORr8, CGTBr8, CEQBr8>;
-def : SETCCBinOpImm<setge, R8C, immSExt8, i8, ORr8, CGTBIr8, CEQBIr8>;
-def : SETCCBinOpReg<setlt, R8C, NORr8, CGTBr8, CEQBr8>;
-def : SETCCBinOpImm<setlt, R8C, immSExt8, i8, NORr8, CGTBIr8, CEQBIr8>;
-def : Pat<(setle R8C:$rA, R8C:$rB),
- (XORBIr8 (CGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
-def : Pat<(setle R8C:$rA, immU8:$imm),
- (XORBIr8 (CGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
-
-def : SETCCBinOpReg<setge, R16C, ORr16, CGTHr16, CEQHr16>;
-def : SETCCBinOpImm<setge, R16C, i16ImmSExt10, i16,
- ORr16, CGTHIr16, CEQHIr16>;
-def : SETCCBinOpReg<setlt, R16C, NORr16, CGTHr16, CEQHr16>;
-def : SETCCBinOpImm<setlt, R16C, i16ImmSExt10, i16, NORr16, CGTHIr16, CEQHIr16>;
-def : Pat<(setle R16C:$rA, R16C:$rB),
- (XORHIr16 (CGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
-def : Pat<(setle R16C:$rA, i16ImmSExt10:$imm),
- (XORHIr16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
-
-def : SETCCBinOpReg<setge, R32C, ORr32, CGTr32, CEQr32>;
-def : SETCCBinOpImm<setge, R32C, i32ImmSExt10, i32,
- ORr32, CGTIr32, CEQIr32>;
-def : SETCCBinOpReg<setlt, R32C, NORr32, CGTr32, CEQr32>;
-def : SETCCBinOpImm<setlt, R32C, i32ImmSExt10, i32, NORr32, CGTIr32, CEQIr32>;
-def : Pat<(setle R32C:$rA, R32C:$rB),
- (XORIr32 (CGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
-def : Pat<(setle R32C:$rA, i32ImmSExt10:$imm),
- (XORIr32 (CGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
-
-def : SETCCBinOpReg<setuge, R8C, ORr8, CLGTBr8, CEQBr8>;
-def : SETCCBinOpImm<setuge, R8C, immSExt8, i8, ORr8, CLGTBIr8, CEQBIr8>;
-def : SETCCBinOpReg<setult, R8C, NORr8, CLGTBr8, CEQBr8>;
-def : SETCCBinOpImm<setult, R8C, immSExt8, i8, NORr8, CLGTBIr8, CEQBIr8>;
-def : Pat<(setule R8C:$rA, R8C:$rB),
- (XORBIr8 (CLGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
-def : Pat<(setule R8C:$rA, immU8:$imm),
- (XORBIr8 (CLGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
-
-def : SETCCBinOpReg<setuge, R16C, ORr16, CLGTHr16, CEQHr16>;
-def : SETCCBinOpImm<setuge, R16C, i16ImmSExt10, i16,
- ORr16, CLGTHIr16, CEQHIr16>;
-def : SETCCBinOpReg<setult, R16C, NORr16, CLGTHr16, CEQHr16>;
-def : SETCCBinOpImm<setult, R16C, i16ImmSExt10, i16, NORr16,
- CLGTHIr16, CEQHIr16>;
-def : Pat<(setule R16C:$rA, R16C:$rB),
- (XORHIr16 (CLGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
-def : Pat<(setule R16C:$rA, i16ImmSExt10:$imm),
- (XORHIr16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
-
-def : SETCCBinOpReg<setuge, R32C, ORr32, CLGTr32, CEQr32>;
-def : SETCCBinOpImm<setuge, R32C, i32ImmSExt10, i32,
- ORr32, CLGTIr32, CEQIr32>;
-def : SETCCBinOpReg<setult, R32C, NORr32, CLGTr32, CEQr32>;
-def : SETCCBinOpImm<setult, R32C, i32ImmSExt10, i32, NORr32, CLGTIr32, CEQIr32>;
-def : Pat<(setule R32C:$rA, R32C:$rB),
- (XORIr32 (CLGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
-def : Pat<(setule R32C:$rA, i32ImmSExt10:$imm),
- (XORIr32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// select conditional patterns:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-class SELECTNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
- SPUInstr selinstr, SPUInstr cmpare>:
- Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
- rclass:$rTrue, rclass:$rFalse),
- (selinstr rclass:$rTrue, rclass:$rFalse,
- (cmpare rclass:$rA, rclass:$rB))>;
-
-class SELECTNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
- PatLeaf immpred, SPUInstr selinstr, SPUInstr cmpare>:
- Pat<(select (inttype (cond rclass:$rA, immpred:$imm)),
- rclass:$rTrue, rclass:$rFalse),
- (selinstr rclass:$rTrue, rclass:$rFalse,
- (cmpare rclass:$rA, immpred:$imm))>;
-
-def : SELECTNegCondReg<setne, R8C, i8, SELBr8, CEQBr8>;
-def : SELECTNegCondImm<setne, R8C, i8, immSExt8, SELBr8, CEQBIr8>;
-def : SELECTNegCondReg<setle, R8C, i8, SELBr8, CGTBr8>;
-def : SELECTNegCondImm<setle, R8C, i8, immSExt8, SELBr8, CGTBr8>;
-def : SELECTNegCondReg<setule, R8C, i8, SELBr8, CLGTBr8>;
-def : SELECTNegCondImm<setule, R8C, i8, immU8, SELBr8, CLGTBIr8>;
-
-def : SELECTNegCondReg<setne, R16C, i16, SELBr16, CEQHr16>;
-def : SELECTNegCondImm<setne, R16C, i16, i16ImmSExt10, SELBr16, CEQHIr16>;
-def : SELECTNegCondReg<setle, R16C, i16, SELBr16, CGTHr16>;
-def : SELECTNegCondImm<setle, R16C, i16, i16ImmSExt10, SELBr16, CGTHIr16>;
-def : SELECTNegCondReg<setule, R16C, i16, SELBr16, CLGTHr16>;
-def : SELECTNegCondImm<setule, R16C, i16, i16ImmSExt10, SELBr16, CLGTHIr16>;
-
-def : SELECTNegCondReg<setne, R32C, i32, SELBr32, CEQr32>;
-def : SELECTNegCondImm<setne, R32C, i32, i32ImmSExt10, SELBr32, CEQIr32>;
-def : SELECTNegCondReg<setle, R32C, i32, SELBr32, CGTr32>;
-def : SELECTNegCondImm<setle, R32C, i32, i32ImmSExt10, SELBr32, CGTIr32>;
-def : SELECTNegCondReg<setule, R32C, i32, SELBr32, CLGTr32>;
-def : SELECTNegCondImm<setule, R32C, i32, i32ImmSExt10, SELBr32, CLGTIr32>;
-
-class SELECTBinOpReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
- SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
- SPUInstr cmpOp2>:
- Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
- rclass:$rTrue, rclass:$rFalse),
- (selinstr rclass:$rFalse, rclass:$rTrue,
- (binop (cmpOp1 rclass:$rA, rclass:$rB),
- (cmpOp2 rclass:$rA, rclass:$rB)))>;
-
-class SELECTBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
- ValueType inttype,
- SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
- SPUInstr cmpOp2>:
- Pat<(select (inttype (cond rclass:$rA, (inttype immpred:$imm))),
- rclass:$rTrue, rclass:$rFalse),
- (selinstr rclass:$rFalse, rclass:$rTrue,
- (binop (cmpOp1 rclass:$rA, (inttype immpred:$imm)),
- (cmpOp2 rclass:$rA, (inttype immpred:$imm))))>;
-
-def : SELECTBinOpReg<setge, R8C, i8, SELBr8, ORr8, CGTBr8, CEQBr8>;
-def : SELECTBinOpImm<setge, R8C, immSExt8, i8,
- SELBr8, ORr8, CGTBIr8, CEQBIr8>;
-
-def : SELECTBinOpReg<setge, R16C, i16, SELBr16, ORr16, CGTHr16, CEQHr16>;
-def : SELECTBinOpImm<setge, R16C, i16ImmSExt10, i16,
- SELBr16, ORr16, CGTHIr16, CEQHIr16>;
-
-def : SELECTBinOpReg<setge, R32C, i32, SELBr32, ORr32, CGTr32, CEQr32>;
-def : SELECTBinOpImm<setge, R32C, i32ImmSExt10, i32,
- SELBr32, ORr32, CGTIr32, CEQIr32>;
-
-def : SELECTBinOpReg<setuge, R8C, i8, SELBr8, ORr8, CLGTBr8, CEQBr8>;
-def : SELECTBinOpImm<setuge, R8C, immSExt8, i8,
- SELBr8, ORr8, CLGTBIr8, CEQBIr8>;
-
-def : SELECTBinOpReg<setuge, R16C, i16, SELBr16, ORr16, CLGTHr16, CEQHr16>;
-def : SELECTBinOpImm<setuge, R16C, i16ImmUns10, i16,
- SELBr16, ORr16, CLGTHIr16, CEQHIr16>;
-
-def : SELECTBinOpReg<setuge, R32C, i32, SELBr32, ORr32, CLGTr32, CEQr32>;
-def : SELECTBinOpImm<setuge, R32C, i32ImmUns10, i32,
- SELBr32, ORr32, CLGTIr32, CEQIr32>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-let isCall = 1,
- // All calls clobber the non-callee-saved registers:
- Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9,
- R10,R11,R12,R13,R14,R15,R16,R17,R18,R19,
- R20,R21,R22,R23,R24,R25,R26,R27,R28,R29,
- R30,R31,R32,R33,R34,R35,R36,R37,R38,R39,
- R40,R41,R42,R43,R44,R45,R46,R47,R48,R49,
- R50,R51,R52,R53,R54,R55,R56,R57,R58,R59,
- R60,R61,R62,R63,R64,R65,R66,R67,R68,R69,
- R70,R71,R72,R73,R74,R75,R76,R77,R78,R79],
- // All of these instructions use $lr (aka $0)
- Uses = [R0] in {
- // Branch relative and set link: Used if we actually know that the target
- // is within [-32768, 32767] bytes of the target
- def BRSL:
- BranchSetLink<0b011001100, (outs), (ins relcalltarget:$func),
- "brsl\t$$lr, $func",
- [(SPUcall (SPUpcrel tglobaladdr:$func, 0))]>;
-
- // Branch absolute and set link: Used if we actually know that the target
- // is an absolute address
- def BRASL:
- BranchSetLink<0b011001100, (outs), (ins calltarget:$func),
- "brasl\t$$lr, $func",
- [(SPUcall (SPUaform tglobaladdr:$func, 0))]>;
-
- // Branch indirect and set link if external data. These instructions are not
- // actually generated, matched by an intrinsic:
- def BISLED_00: BISLEDForm<0b11, "bisled\t$$lr, $func", [/* empty pattern */]>;
- def BISLED_E0: BISLEDForm<0b10, "bisled\t$$lr, $func", [/* empty pattern */]>;
- def BISLED_0D: BISLEDForm<0b01, "bisled\t$$lr, $func", [/* empty pattern */]>;
- def BISLED_ED: BISLEDForm<0b00, "bisled\t$$lr, $func", [/* empty pattern */]>;
-
- // Branch indirect and set link. This is the "X-form" address version of a
- // function call
- def BISL:
- BIForm<0b10010101100, "bisl\t$$lr, $func", [(SPUcall R32C:$func)]>;
-}
-
-// Support calls to external symbols:
-def : Pat<(SPUcall (SPUpcrel texternalsym:$func, 0)),
- (BRSL texternalsym:$func)>;
-
-def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)),
- (BRASL texternalsym:$func)>;
-
-// Unconditional branches:
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
- let isBarrier = 1 in {
- def BR :
- UncondBranch<0b001001100, (outs), (ins brtarget:$dest),
- "br\t$dest",
- [(br bb:$dest)]>;
-
- // Unconditional, absolute address branch
- def BRA:
- UncondBranch<0b001100000, (outs), (ins brtarget:$dest),
- "bra\t$dest",
- [/* no pattern */]>;
-
- // Indirect branch
- let isIndirectBranch = 1 in {
- def BI:
- BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
- }
- }
-
- // Conditional branches:
- class BRNZInst<dag IOL, list<dag> pattern>:
- RI16Form<0b010000100, (outs), IOL, "brnz\t$rCond,$dest",
- BranchResolv, pattern>;
-
- class BRNZRegInst<RegisterClass rclass>:
- BRNZInst<(ins rclass:$rCond, brtarget:$dest),
- [(brcond rclass:$rCond, bb:$dest)]>;
-
- class BRNZVecInst<ValueType vectype>:
- BRNZInst<(ins VECREG:$rCond, brtarget:$dest),
- [(brcond (vectype VECREG:$rCond), bb:$dest)]>;
-
- multiclass BranchNotZero {
- def v4i32 : BRNZVecInst<v4i32>;
- def r32 : BRNZRegInst<R32C>;
- }
-
- defm BRNZ : BranchNotZero;
-
- class BRZInst<dag IOL, list<dag> pattern>:
- RI16Form<0b000000100, (outs), IOL, "brz\t$rT,$dest",
- BranchResolv, pattern>;
-
- class BRZRegInst<RegisterClass rclass>:
- BRZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
-
- class BRZVecInst<ValueType vectype>:
- BRZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
-
- multiclass BranchZero {
- def v4i32: BRZVecInst<v4i32>;
- def r32: BRZRegInst<R32C>;
- }
-
- defm BRZ: BranchZero;
-
- // Note: LLVM doesn't do branch conditional, indirect. Otherwise these would
- // be useful:
- /*
- class BINZInst<dag IOL, list<dag> pattern>:
- BICondForm<0b10010100100, (outs), IOL, "binz\t$rA, $dest", pattern>;
-
- class BINZRegInst<RegisterClass rclass>:
- BINZInst<(ins rclass:$rA, brtarget:$dest),
- [(brcond rclass:$rA, R32C:$dest)]>;
-
- class BINZVecInst<ValueType vectype>:
- BINZInst<(ins VECREG:$rA, R32C:$dest),
- [(brcond (vectype VECREG:$rA), R32C:$dest)]>;
-
- multiclass BranchNotZeroIndirect {
- def v4i32: BINZVecInst<v4i32>;
- def r32: BINZRegInst<R32C>;
- }
-
- defm BINZ: BranchNotZeroIndirect;
-
- class BIZInst<dag IOL, list<dag> pattern>:
- BICondForm<0b00010100100, (outs), IOL, "biz\t$rA, $func", pattern>;
-
- class BIZRegInst<RegisterClass rclass>:
- BIZInst<(ins rclass:$rA, R32C:$func), [/* no pattern */]>;
-
- class BIZVecInst<ValueType vectype>:
- BIZInst<(ins VECREG:$rA, R32C:$func), [/* no pattern */]>;
-
- multiclass BranchZeroIndirect {
- def v4i32: BIZVecInst<v4i32>;
- def r32: BIZRegInst<R32C>;
- }
-
- defm BIZ: BranchZeroIndirect;
- */
-
- class BRHNZInst<dag IOL, list<dag> pattern>:
- RI16Form<0b011000100, (outs), IOL, "brhnz\t$rCond,$dest", BranchResolv,
- pattern>;
-
- class BRHNZRegInst<RegisterClass rclass>:
- BRHNZInst<(ins rclass:$rCond, brtarget:$dest),
- [(brcond rclass:$rCond, bb:$dest)]>;
-
- class BRHNZVecInst<ValueType vectype>:
- BRHNZInst<(ins VECREG:$rCond, brtarget:$dest), [/* no pattern */]>;
-
- multiclass BranchNotZeroHalfword {
- def v8i16: BRHNZVecInst<v8i16>;
- def r16: BRHNZRegInst<R16C>;
- }
-
- defm BRHNZ: BranchNotZeroHalfword;
-
- class BRHZInst<dag IOL, list<dag> pattern>:
- RI16Form<0b001000100, (outs), IOL, "brhz\t$rT,$dest", BranchResolv,
- pattern>;
-
- class BRHZRegInst<RegisterClass rclass>:
- BRHZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
-
- class BRHZVecInst<ValueType vectype>:
- BRHZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
-
- multiclass BranchZeroHalfword {
- def v8i16: BRHZVecInst<v8i16>;
- def r16: BRHZRegInst<R16C>;
- }
-
- defm BRHZ: BranchZeroHalfword;
-}
-
-//===----------------------------------------------------------------------===//
-// setcc and brcond patterns:
-//===----------------------------------------------------------------------===//
-
-def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest),
- (BRHZr16 R16C:$rA, bb:$dest)>;
-def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest),
- (BRHNZr16 R16C:$rA, bb:$dest)>;
-
-def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest),
- (BRZr32 R32C:$rA, bb:$dest)>;
-def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest),
- (BRNZr32 R32C:$rA, bb:$dest)>;
-
-multiclass BranchCondEQ<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
-{
- def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
- (brinst16 (CEQHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
-
- def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
- (brinst16 (CEQHr16 R16C:$rA, R16:$rB), bb:$dest)>;
-
- def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
- (brinst32 (CEQIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
-
- def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
- (brinst32 (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>;
-}
-
-defm BRCONDeq : BranchCondEQ<seteq, BRHNZr16, BRNZr32>;
-defm BRCONDne : BranchCondEQ<setne, BRHZr16, BRZr32>;
-
-multiclass BranchCondLGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
-{
- def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
- (brinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
-
- def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
- (brinst16 (CLGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
-
- def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
- (brinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
-
- def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
- (brinst32 (CLGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
-}
-
-defm BRCONDugt : BranchCondLGT<setugt, BRHNZr16, BRNZr32>;
-defm BRCONDule : BranchCondLGT<setule, BRHZr16, BRZr32>;
-
-multiclass BranchCondLGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
- SPUInstr orinst32, SPUInstr brinst32>
-{
- def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
- (brinst16 (orinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val),
- (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
- bb:$dest)>;
-
- def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
- (brinst16 (orinst16 (CLGTHr16 R16C:$rA, R16:$rB),
- (CEQHr16 R16C:$rA, R16:$rB)),
- bb:$dest)>;
-
- def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
- (brinst32 (orinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val),
- (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
- bb:$dest)>;
-
- def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
- (brinst32 (orinst32 (CLGTr32 R32C:$rA, R32C:$rB),
- (CEQr32 R32C:$rA, R32C:$rB)),
- bb:$dest)>;
-}
-
-defm BRCONDuge : BranchCondLGTEQ<setuge, ORr16, BRHNZr16, ORr32, BRNZr32>;
-defm BRCONDult : BranchCondLGTEQ<setult, ORr16, BRHZr16, ORr32, BRZr32>;
-
-multiclass BranchCondGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
-{
- def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
- (brinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
-
- def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
- (brinst16 (CGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
-
- def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
- (brinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
-
- def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
- (brinst32 (CGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
-}
-
-defm BRCONDgt : BranchCondGT<setgt, BRHNZr16, BRNZr32>;
-defm BRCONDle : BranchCondGT<setle, BRHZr16, BRZr32>;
-
-multiclass BranchCondGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
- SPUInstr orinst32, SPUInstr brinst32>
-{
- def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
- (brinst16 (orinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val),
- (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
- bb:$dest)>;
-
- def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
- (brinst16 (orinst16 (CGTHr16 R16C:$rA, R16:$rB),
- (CEQHr16 R16C:$rA, R16:$rB)),
- bb:$dest)>;
-
- def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
- (brinst32 (orinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val),
- (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
- bb:$dest)>;
-
- def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
- (brinst32 (orinst32 (CGTr32 R32C:$rA, R32C:$rB),
- (CEQr32 R32C:$rA, R32C:$rB)),
- bb:$dest)>;
-}
-
-defm BRCONDge : BranchCondGTEQ<setge, ORr16, BRHNZr16, ORr32, BRNZr32>;
-defm BRCONDlt : BranchCondGTEQ<setlt, ORr16, BRHZr16, ORr32, BRZr32>;
-
-let isTerminator = 1, isBarrier = 1 in {
- let isReturn = 1 in {
- def RET:
- RETForm<"bi\t$$lr", [(retflag)]>;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Single precision floating point instructions
-//===----------------------------------------------------------------------===//
-
-class FAInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB",
- SPrecFP, pattern>;
-
-class FAVecInst<ValueType vectype>:
- FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
-
-multiclass SFPAdd
-{
- def v4f32: FAVecInst<v4f32>;
- def f32: FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
- [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>;
-}
-
-defm FA : SFPAdd;
-
-class FSInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b01011000100, OOL, IOL, "fs\t$rT, $rA, $rB",
- SPrecFP, pattern>;
-
-class FSVecInst<ValueType vectype>:
- FSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT),
- (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
-
-multiclass SFPSub
-{
- def v4f32: FSVecInst<v4f32>;
- def f32: FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
- [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>;
-}
-
-defm FS : SFPSub;
-
-class FMInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b01100011010, OOL, IOL,
- "fm\t$rT, $rA, $rB", SPrecFP,
- pattern>;
-
-class FMVecInst<ValueType type>:
- FMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (type VECREG:$rT),
- (fmul (type VECREG:$rA), (type VECREG:$rB)))]>;
-
-multiclass SFPMul
-{
- def v4f32: FMVecInst<v4f32>;
- def f32: FMInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
- [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>;
-}
-
-defm FM : SFPMul;
-
-// Floating point multiply and add
-// e.g. d = c + (a * b)
-def FMAv4f32:
- RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fma\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT),
- (fadd (v4f32 VECREG:$rC),
- (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
-
-def FMAf32:
- RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
- "fma\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
-
-// FP multiply and subtract
-// Subtracts value in rC from product
-// res = a * b - c
-def FMSv4f32 :
- RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT),
- (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
- (v4f32 VECREG:$rC)))]>;
-
-def FMSf32 :
- RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
- "fms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set R32FP:$rT,
- (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
-
-// Floating Negative Mulitply and Subtract
-// Subtracts product from value in rC
-// res = fneg(fms a b c)
-// = - (a * b - c)
-// = c - a * b
-// NOTE: subtraction order
-// fsub a b = a - b
-// fs a b = b - a?
-def FNMSf32 :
- RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
- "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
-
-def FNMSv4f32 :
- RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT),
- (fsub (v4f32 VECREG:$rC),
- (fmul (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB))))]>;
-
-
-
-
-// Floating point reciprocal estimate
-
-class FRESTInst<dag OOL, dag IOL>:
- RRForm_1<0b00110111000, OOL, IOL,
- "frest\t$rT, $rA", SPrecFP,
- [/* no pattern */]>;
-
-def FRESTv4f32 :
- FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
-
-def FRESTf32 :
- FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>;
-
-// Floating point interpolate (used in conjunction with reciprocal estimate)
-def FIv4f32 :
- RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fi\t$rT, $rA, $rB", SPrecFP,
- [/* no pattern */]>;
-
-def FIf32 :
- RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
- "fi\t$rT, $rA, $rB", SPrecFP,
- [/* no pattern */]>;
-
-//--------------------------------------------------------------------------
-// Basic single precision floating point comparisons:
-//
-// Note: There is no support on SPU for single precision NaN. Consequently,
-// ordered and unordered comparisons are the same.
-//--------------------------------------------------------------------------
-
-def FCEQf32 :
- RRForm<0b01000011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
- "fceq\t$rT, $rA, $rB", SPrecFP,
- [(set R32C:$rT, (setueq R32FP:$rA, R32FP:$rB))]>;
-
-def : Pat<(setoeq R32FP:$rA, R32FP:$rB),
- (FCEQf32 R32FP:$rA, R32FP:$rB)>;
-
-def FCMEQf32 :
- RRForm<0b01010011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
- "fcmeq\t$rT, $rA, $rB", SPrecFP,
- [(set R32C:$rT, (setueq (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
-
-def : Pat<(setoeq (fabs R32FP:$rA), (fabs R32FP:$rB)),
- (FCMEQf32 R32FP:$rA, R32FP:$rB)>;
-
-def FCGTf32 :
- RRForm<0b01000011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
- "fcgt\t$rT, $rA, $rB", SPrecFP,
- [(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>;
-
-def : Pat<(setogt R32FP:$rA, R32FP:$rB),
- (FCGTf32 R32FP:$rA, R32FP:$rB)>;
-
-def FCMGTf32 :
- RRForm<0b01010011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
- "fcmgt\t$rT, $rA, $rB", SPrecFP,
- [(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
-
-def : Pat<(setogt (fabs R32FP:$rA), (fabs R32FP:$rB)),
- (FCMGTf32 R32FP:$rA, R32FP:$rB)>;
-
-//--------------------------------------------------------------------------
-// Single precision floating point comparisons and SETCC equivalents:
-//--------------------------------------------------------------------------
-
-def : SETCCNegCondReg<setune, R32FP, i32, XORIr32, FCEQf32>;
-def : SETCCNegCondReg<setone, R32FP, i32, XORIr32, FCEQf32>;
-
-def : SETCCBinOpReg<setuge, R32FP, ORr32, FCGTf32, FCEQf32>;
-def : SETCCBinOpReg<setoge, R32FP, ORr32, FCGTf32, FCEQf32>;
-
-def : SETCCBinOpReg<setult, R32FP, NORr32, FCGTf32, FCEQf32>;
-def : SETCCBinOpReg<setolt, R32FP, NORr32, FCGTf32, FCEQf32>;
-
-def : Pat<(setule R32FP:$rA, R32FP:$rB),
- (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
-def : Pat<(setole R32FP:$rA, R32FP:$rB),
- (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
-
-// FP Status and Control Register Write
-// Why isn't rT a don't care in the ISA?
-// Should we create a special RRForm_3 for this guy and zero out the rT?
-def FSCRWf32 :
- RRForm_1<0b01011101110, (outs R32FP:$rT), (ins R32FP:$rA),
- "fscrwr\t$rA", SPrecFP,
- [/* This instruction requires an intrinsic. Note: rT is unused. */]>;
-
-// FP Status and Control Register Read
-def FSCRRf32 :
- RRForm_2<0b01011101110, (outs R32FP:$rT), (ins),
- "fscrrd\t$rT", SPrecFP,
- [/* This instruction requires an intrinsic */]>;
-
-// llvm instruction space
-// How do these map onto cell instructions?
-// fdiv rA rB
-// frest rC rB # c = 1/b (both lines)
-// fi rC rB rC
-// fm rD rA rC # d = a * 1/b
-// fnms rB rD rB rA # b = - (d * b - a) --should == 0 in a perfect world
-// fma rB rB rC rD # b = b * c + d
-// = -(d *b -a) * c + d
-// = a * c - c ( a *b *c - a)
-
-// fcopysign (???)
-
-// Library calls:
-// These llvm instructions will actually map to library calls.
-// All that's needed, then, is to check that the appropriate library is
-// imported and do a brsl to the proper function name.
-// frem # fmod(x, y): x - (x/y) * y
-// (Note: fmod(double, double), fmodf(float,float)
-// fsqrt?
-// fsin?
-// fcos?
-// Unimplemented SPU instruction space
-// floating reciprocal absolute square root estimate (frsqest)
-
-// The following are probably just intrinsics
-// status and control register write
-// status and control register read
-
-//--------------------------------------
-// Floating Point Conversions
-// Signed conversions:
-def CSiFv4f32:
- CVTIntFPForm<0b0101101110, (outs VECREG:$rT), (ins VECREG:$rA),
- "csflt\t$rT, $rA, 0", SPrecFP,
- [(set (v4f32 VECREG:$rT), (sint_to_fp (v4i32 VECREG:$rA)))]>;
-
-// Convert signed integer to floating point
-def CSiFf32 :
- CVTIntFPForm<0b0101101110, (outs R32FP:$rT), (ins R32C:$rA),
- "csflt\t$rT, $rA, 0", SPrecFP,
- [(set R32FP:$rT, (sint_to_fp R32C:$rA))]>;
-
-// Convert unsigned into to float
-def CUiFv4f32 :
- CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
- "cuflt\t$rT, $rA, 0", SPrecFP,
- [(set (v4f32 VECREG:$rT), (uint_to_fp (v4i32 VECREG:$rA)))]>;
-
-def CUiFf32 :
- CVTIntFPForm<0b1101101110, (outs R32FP:$rT), (ins R32C:$rA),
- "cuflt\t$rT, $rA, 0", SPrecFP,
- [(set R32FP:$rT, (uint_to_fp R32C:$rA))]>;
-
-// Convert float to unsigned int
-// Assume that scale = 0
-
-def CFUiv4f32 :
- CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
- "cfltu\t$rT, $rA, 0", SPrecFP,
- [(set (v4i32 VECREG:$rT), (fp_to_uint (v4f32 VECREG:$rA)))]>;
-
-def CFUif32 :
- CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
- "cfltu\t$rT, $rA, 0", SPrecFP,
- [(set R32C:$rT, (fp_to_uint R32FP:$rA))]>;
-
-// Convert float to signed int
-// Assume that scale = 0
-
-def CFSiv4f32 :
- CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
- "cflts\t$rT, $rA, 0", SPrecFP,
- [(set (v4i32 VECREG:$rT), (fp_to_sint (v4f32 VECREG:$rA)))]>;
-
-def CFSif32 :
- CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
- "cflts\t$rT, $rA, 0", SPrecFP,
- [(set R32C:$rT, (fp_to_sint R32FP:$rA))]>;
-
-//===----------------------------------------------------------------------==//
-// Single<->Double precision conversions
-//===----------------------------------------------------------------------==//
-
-// NOTE: We use "vec" name suffix here to avoid confusion (e.g. input is a
-// v4f32, output is v2f64--which goes in the name?)
-
-// Floating point extend single to double
-// NOTE: Not sure if passing in v4f32 to FESDvec is correct since it
-// operates on two double-word slots (i.e. 1st and 3rd fp numbers
-// are ignored).
-def FESDvec :
- RRForm_1<0b00011101110, (outs VECREG:$rT), (ins VECREG:$rA),
- "fesd\t$rT, $rA", SPrecFP,
- [/*(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))*/]>;
-
-def FESDf32 :
- RRForm_1<0b00011101110, (outs R64FP:$rT), (ins R32FP:$rA),
- "fesd\t$rT, $rA", SPrecFP,
- [(set R64FP:$rT, (fextend R32FP:$rA))]>;
-
-// Floating point round double to single
-//def FRDSvec :
-// RRForm_1<0b10011101110, (outs VECREG:$rT), (ins VECREG:$rA),
-// "frds\t$rT, $rA,", SPrecFP,
-// [(set (v4f32 R32FP:$rT), (fround (v2f64 R64FP:$rA)))]>;
-
-def FRDSf64 :
- RRForm_1<0b10011101110, (outs R32FP:$rT), (ins R64FP:$rA),
- "frds\t$rT, $rA", SPrecFP,
- [(set R32FP:$rT, (fround R64FP:$rA))]>;
-
-//ToDo include anyextend?
-
-//===----------------------------------------------------------------------==//
-// Double precision floating point instructions
-//===----------------------------------------------------------------------==//
-def FAf64 :
- RRForm<0b00110011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
- "dfa\t$rT, $rA, $rB", DPrecFP,
- [(set R64FP:$rT, (fadd R64FP:$rA, R64FP:$rB))]>;
-
-def FAv2f64 :
- RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfa\t$rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT), (fadd (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
-
-def FSf64 :
- RRForm<0b10100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
- "dfs\t$rT, $rA, $rB", DPrecFP,
- [(set R64FP:$rT, (fsub R64FP:$rA, R64FP:$rB))]>;
-
-def FSv2f64 :
- RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfs\t$rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT),
- (fsub (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
-
-def FMf64 :
- RRForm<0b01100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
- "dfm\t$rT, $rA, $rB", DPrecFP,
- [(set R64FP:$rT, (fmul R64FP:$rA, R64FP:$rB))]>;
-
-def FMv2f64:
- RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "dfm\t$rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT),
- (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
-
-def FMAf64:
- RRForm<0b00111010110, (outs R64FP:$rT),
- (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
- "dfma\t$rT, $rA, $rB", DPrecFP,
- [(set R64FP:$rT, (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB)))]>,
- RegConstraint<"$rC = $rT">,
- NoEncode<"$rC">;
-
-def FMAv2f64:
- RRForm<0b00111010110, (outs VECREG:$rT),
- (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "dfma\t$rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT),
- (fadd (v2f64 VECREG:$rC),
- (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB))))]>,
- RegConstraint<"$rC = $rT">,
- NoEncode<"$rC">;
-
-def FMSf64 :
- RRForm<0b10111010110, (outs R64FP:$rT),
- (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
- "dfms\t$rT, $rA, $rB", DPrecFP,
- [(set R64FP:$rT, (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))]>,
- RegConstraint<"$rC = $rT">,
- NoEncode<"$rC">;
-
-def FMSv2f64 :
- RRForm<0b10111010110, (outs VECREG:$rT),
- (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "dfms\t$rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT),
- (fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)),
- (v2f64 VECREG:$rC)))]>;
-
-// DFNMS: - (a * b - c)
-// - (a * b) + c => c - (a * b)
-
-class DFNMSInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm<0b01111010110, OOL, IOL, "dfnms\t$rT, $rA, $rB",
- DPrecFP, pattern>,
- RegConstraint<"$rC = $rT">,
- NoEncode<"$rC">;
-
-class DFNMSVecInst<list<dag> pattern>:
- DFNMSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- pattern>;
-
-class DFNMSRegInst<list<dag> pattern>:
- DFNMSInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
- pattern>;
-
-multiclass DFMultiplySubtract
-{
- def v2f64 : DFNMSVecInst<[(set (v2f64 VECREG:$rT),
- (fsub (v2f64 VECREG:$rC),
- (fmul (v2f64 VECREG:$rA),
- (v2f64 VECREG:$rB))))]>;
-
- def f64 : DFNMSRegInst<[(set R64FP:$rT,
- (fsub R64FP:$rC,
- (fmul R64FP:$rA, R64FP:$rB)))]>;
-}
-
-defm DFNMS : DFMultiplySubtract;
-
-// - (a * b + c)
-// - (a * b) - c
-def FNMAf64 :
- RRForm<0b11111010110, (outs R64FP:$rT),
- (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
- "dfnma\t$rT, $rA, $rB", DPrecFP,
- [(set R64FP:$rT, (fneg (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB))))]>,
- RegConstraint<"$rC = $rT">,
- NoEncode<"$rC">;
-
-def FNMAv2f64 :
- RRForm<0b11111010110, (outs VECREG:$rT),
- (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "dfnma\t$rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT),
- (fneg (fadd (v2f64 VECREG:$rC),
- (fmul (v2f64 VECREG:$rA),
- (v2f64 VECREG:$rB)))))]>,
- RegConstraint<"$rC = $rT">,
- NoEncode<"$rC">;
-
-//===----------------------------------------------------------------------==//
-// Floating point negation and absolute value
-//===----------------------------------------------------------------------==//
-
-def : Pat<(fneg (v4f32 VECREG:$rA)),
- (XORfnegvec (v4f32 VECREG:$rA),
- (v4f32 (ILHUv4i32 0x8000)))>;
-
-def : Pat<(fneg R32FP:$rA),
- (XORfneg32 R32FP:$rA, (ILHUr32 0x8000))>;
-
-// Floating point absolute value
-// Note: f64 fabs is custom-selected.
-
-def : Pat<(fabs R32FP:$rA),
- (ANDfabs32 R32FP:$rA, (IOHLr32 (ILHUr32 0x7fff), 0xffff))>;
-
-def : Pat<(fabs (v4f32 VECREG:$rA)),
- (ANDfabsvec (v4f32 VECREG:$rA),
- (IOHLv4i32 (ILHUv4i32 0x7fff), 0xffff))>;
-
-//===----------------------------------------------------------------------===//
-// Hint for branch instructions:
-//===----------------------------------------------------------------------===//
-def HBRA :
- HBI16Form<0b0001001,(ins hbrtarget:$brinst, brtarget:$btarg), "hbra\t$brinst, $btarg">;
-
-//===----------------------------------------------------------------------===//
-// Execution, Load NOP (execute NOPs belong in even pipeline, load NOPs belong
-// in the odd pipeline)
-//===----------------------------------------------------------------------===//
-
-def ENOP : SPUInstr<(outs), (ins), "nop", ExecNOP> {
- let Pattern = [];
-
- let Inst{0-10} = 0b10000000010;
- let Inst{11-17} = 0;
- let Inst{18-24} = 0;
- let Inst{25-31} = 0;
-}
-
-def LNOP : SPUInstr<(outs), (ins), "lnop", LoadNOP> {
- let Pattern = [];
-
- let Inst{0-10} = 0b10000000000;
- let Inst{11-17} = 0;
- let Inst{18-24} = 0;
- let Inst{25-31} = 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Bit conversions (type conversions between vector/packed types)
-// NOTE: Promotions are handled using the XS* instructions.
-//===----------------------------------------------------------------------===//
-def : Pat<(v16i8 (bitconvert (v8i16 VECREG:$src))), (v16i8 VECREG:$src)>;
-def : Pat<(v16i8 (bitconvert (v4i32 VECREG:$src))), (v16i8 VECREG:$src)>;
-def : Pat<(v16i8 (bitconvert (v2i64 VECREG:$src))), (v16i8 VECREG:$src)>;
-def : Pat<(v16i8 (bitconvert (v4f32 VECREG:$src))), (v16i8 VECREG:$src)>;
-def : Pat<(v16i8 (bitconvert (v2f64 VECREG:$src))), (v16i8 VECREG:$src)>;
-
-def : Pat<(v8i16 (bitconvert (v16i8 VECREG:$src))), (v8i16 VECREG:$src)>;
-def : Pat<(v8i16 (bitconvert (v4i32 VECREG:$src))), (v8i16 VECREG:$src)>;
-def : Pat<(v8i16 (bitconvert (v2i64 VECREG:$src))), (v8i16 VECREG:$src)>;
-def : Pat<(v8i16 (bitconvert (v4f32 VECREG:$src))), (v8i16 VECREG:$src)>;
-def : Pat<(v8i16 (bitconvert (v2f64 VECREG:$src))), (v8i16 VECREG:$src)>;
-
-def : Pat<(v4i32 (bitconvert (v16i8 VECREG:$src))), (v4i32 VECREG:$src)>;
-def : Pat<(v4i32 (bitconvert (v8i16 VECREG:$src))), (v4i32 VECREG:$src)>;
-def : Pat<(v4i32 (bitconvert (v2i64 VECREG:$src))), (v4i32 VECREG:$src)>;
-def : Pat<(v4i32 (bitconvert (v4f32 VECREG:$src))), (v4i32 VECREG:$src)>;
-def : Pat<(v4i32 (bitconvert (v2f64 VECREG:$src))), (v4i32 VECREG:$src)>;
-
-def : Pat<(v2i64 (bitconvert (v16i8 VECREG:$src))), (v2i64 VECREG:$src)>;
-def : Pat<(v2i64 (bitconvert (v8i16 VECREG:$src))), (v2i64 VECREG:$src)>;
-def : Pat<(v2i64 (bitconvert (v4i32 VECREG:$src))), (v2i64 VECREG:$src)>;
-def : Pat<(v2i64 (bitconvert (v4f32 VECREG:$src))), (v2i64 VECREG:$src)>;
-def : Pat<(v2i64 (bitconvert (v2f64 VECREG:$src))), (v2i64 VECREG:$src)>;
-
-def : Pat<(v4f32 (bitconvert (v16i8 VECREG:$src))), (v4f32 VECREG:$src)>;
-def : Pat<(v4f32 (bitconvert (v8i16 VECREG:$src))), (v4f32 VECREG:$src)>;
-def : Pat<(v4f32 (bitconvert (v2i64 VECREG:$src))), (v4f32 VECREG:$src)>;
-def : Pat<(v4f32 (bitconvert (v4i32 VECREG:$src))), (v4f32 VECREG:$src)>;
-def : Pat<(v4f32 (bitconvert (v2f64 VECREG:$src))), (v4f32 VECREG:$src)>;
-
-def : Pat<(v2f64 (bitconvert (v16i8 VECREG:$src))), (v2f64 VECREG:$src)>;
-def : Pat<(v2f64 (bitconvert (v8i16 VECREG:$src))), (v2f64 VECREG:$src)>;
-def : Pat<(v2f64 (bitconvert (v4i32 VECREG:$src))), (v2f64 VECREG:$src)>;
-def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>;
-def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>;
-
-def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))),
- (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
-def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))),
- (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
-def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))),
- (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
-def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))),
- (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
-def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))),
- (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
-def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))),
- (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
-
-def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))),
- (v16i8 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
-def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))),
- (v8i16 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
-def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))),
- (v4i32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
-def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))),
- (v2i64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
-def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))),
- (v4f32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
-def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))),
- (v2f64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
-
-def : Pat<(i32 (bitconvert R32FP:$rA)),
- (COPY_TO_REGCLASS R32FP:$rA, R32C)>;
-
-def : Pat<(f32 (bitconvert R32C:$rA)),
- (COPY_TO_REGCLASS R32C:$rA, R32FP)>;
-
-def : Pat<(i64 (bitconvert R64FP:$rA)),
- (COPY_TO_REGCLASS R64FP:$rA, R64C)>;
-
-def : Pat<(f64 (bitconvert R64C:$rA)),
- (COPY_TO_REGCLASS R64C:$rA, R64FP)>;
-
-
-//===----------------------------------------------------------------------===//
-// Instruction patterns:
-//===----------------------------------------------------------------------===//
-
-// General 32-bit constants:
-def : Pat<(i32 imm:$imm),
- (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm))>;
-
-// Single precision float constants:
-def : Pat<(f32 fpimm:$imm),
- (IOHLf32 (ILHUf32 (HI16_f32 fpimm:$imm)), (LO16_f32 fpimm:$imm))>;
-
-// General constant 32-bit vectors
-def : Pat<(v4i32 v4i32Imm:$imm),
- (IOHLv4i32 (v4i32 (ILHUv4i32 (HI16_vec v4i32Imm:$imm))),
- (LO16_vec v4i32Imm:$imm))>;
-
-// 8-bit constants
-def : Pat<(i8 imm:$imm),
- (ILHr8 imm:$imm)>;
-
-//===----------------------------------------------------------------------===//
-// Zero/Any/Sign extensions
-//===----------------------------------------------------------------------===//
-
-// sext 8->32: Sign extend bytes to words
-def : Pat<(sext_inreg R32C:$rSrc, i8),
- (XSHWr32 (XSBHr32 R32C:$rSrc))>;
-
-def : Pat<(i32 (sext R8C:$rSrc)),
- (XSHWr16 (XSBHr8 R8C:$rSrc))>;
-
-// sext 8->64: Sign extend bytes to double word
-def : Pat<(sext_inreg R64C:$rSrc, i8),
- (XSWDr64_inreg (XSHWr64 (XSBHr64 R64C:$rSrc)))>;
-
-def : Pat<(i64 (sext R8C:$rSrc)),
- (XSWDr64 (XSHWr16 (XSBHr8 R8C:$rSrc)))>;
-
-// zext 8->16: Zero extend bytes to halfwords
-def : Pat<(i16 (zext R8C:$rSrc)),
- (ANDHIi8i16 R8C:$rSrc, 0xff)>;
-
-// zext 8->32: Zero extend bytes to words
-def : Pat<(i32 (zext R8C:$rSrc)),
- (ANDIi8i32 R8C:$rSrc, 0xff)>;
-
-// zext 8->64: Zero extend bytes to double words
-def : Pat<(i64 (zext R8C:$rSrc)),
- (COPY_TO_REGCLASS (SELBv4i32 (ROTQMBYv4i32
- (COPY_TO_REGCLASS
- (ANDIi8i32 R8C:$rSrc,0xff), VECREG),
- 0x4),
- (ILv4i32 0x0),
- (FSMBIv4i32 0x0f0f)), R64C)>;
-
-// anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits
-def : Pat<(i16 (anyext R8C:$rSrc)),
- (ORHIi8i16 R8C:$rSrc, 0)>;
-
-// anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits
-def : Pat<(i32 (anyext R8C:$rSrc)),
- (COPY_TO_REGCLASS R8C:$rSrc, R32C)>;
-
-// sext 16->64: Sign extend halfword to double word
-def : Pat<(sext_inreg R64C:$rSrc, i16),
- (XSWDr64_inreg (XSHWr64 R64C:$rSrc))>;
-
-def : Pat<(sext R16C:$rSrc),
- (XSWDr64 (XSHWr16 R16C:$rSrc))>;
-
-// zext 16->32: Zero extend halfwords to words
-def : Pat<(i32 (zext R16C:$rSrc)),
- (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff))>;
-
-def : Pat<(i32 (zext (and R16C:$rSrc, 0xf))),
- (ANDIi16i32 R16C:$rSrc, 0xf)>;
-
-def : Pat<(i32 (zext (and R16C:$rSrc, 0xff))),
- (ANDIi16i32 R16C:$rSrc, 0xff)>;
-
-def : Pat<(i32 (zext (and R16C:$rSrc, 0xfff))),
- (ANDIi16i32 R16C:$rSrc, 0xfff)>;
-
-// anyext 16->32: Extend 16->32 bits, irrespective of sign
-def : Pat<(i32 (anyext R16C:$rSrc)),
- (COPY_TO_REGCLASS R16C:$rSrc, R32C)>;
-
-//===----------------------------------------------------------------------===//
-// Truncates:
-// These truncates are for the SPU's supported types (i8, i16, i32). i64 and
-// above are custom lowered.
-//===----------------------------------------------------------------------===//
-
-def : Pat<(i8 (trunc GPRC:$src)),
- (COPY_TO_REGCLASS
- (SHUFBgprc GPRC:$src, GPRC:$src,
- (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)), R8C)>;
-
-def : Pat<(i8 (trunc R64C:$src)),
- (COPY_TO_REGCLASS
- (SHUFBv2i64_m32
- (COPY_TO_REGCLASS R64C:$src, VECREG),
- (COPY_TO_REGCLASS R64C:$src, VECREG),
- (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)), R8C)>;
-
-def : Pat<(i8 (trunc R32C:$src)),
- (COPY_TO_REGCLASS
- (SHUFBv4i32_m32
- (COPY_TO_REGCLASS R32C:$src, VECREG),
- (COPY_TO_REGCLASS R32C:$src, VECREG),
- (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
-
-def : Pat<(i8 (trunc R16C:$src)),
- (COPY_TO_REGCLASS
- (SHUFBv4i32_m32
- (COPY_TO_REGCLASS R16C:$src, VECREG),
- (COPY_TO_REGCLASS R16C:$src, VECREG),
- (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
-
-def : Pat<(i16 (trunc GPRC:$src)),
- (COPY_TO_REGCLASS
- (SHUFBgprc GPRC:$src, GPRC:$src,
- (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)), R16C)>;
-
-def : Pat<(i16 (trunc R64C:$src)),
- (COPY_TO_REGCLASS
- (SHUFBv2i64_m32
- (COPY_TO_REGCLASS R64C:$src, VECREG),
- (COPY_TO_REGCLASS R64C:$src, VECREG),
- (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)), R16C)>;
-
-def : Pat<(i16 (trunc R32C:$src)),
- (COPY_TO_REGCLASS
- (SHUFBv4i32_m32
- (COPY_TO_REGCLASS R32C:$src, VECREG),
- (COPY_TO_REGCLASS R32C:$src, VECREG),
- (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)), R16C)>;
-
-def : Pat<(i32 (trunc GPRC:$src)),
- (COPY_TO_REGCLASS
- (SHUFBgprc GPRC:$src, GPRC:$src,
- (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)), R32C)>;
-
-def : Pat<(i32 (trunc R64C:$src)),
- (COPY_TO_REGCLASS
- (SHUFBv2i64_m32
- (COPY_TO_REGCLASS R64C:$src, VECREG),
- (COPY_TO_REGCLASS R64C:$src, VECREG),
- (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)), R32C)>;
-
-//===----------------------------------------------------------------------===//
-// Address generation: SPU, like PPC, has to split addresses into high and
-// low parts in order to load them into a register.
-//===----------------------------------------------------------------------===//
-
-def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>;
-def : Pat<(SPUaform texternalsym:$in, 0), (ILAlsa texternalsym:$in)>;
-def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>;
-def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>;
-
-def : Pat<(SPUindirect (SPUhi tglobaladdr:$in, 0),
- (SPUlo tglobaladdr:$in, 0)),
- (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
-
-def : Pat<(SPUindirect (SPUhi texternalsym:$in, 0),
- (SPUlo texternalsym:$in, 0)),
- (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
-
-def : Pat<(SPUindirect (SPUhi tjumptable:$in, 0),
- (SPUlo tjumptable:$in, 0)),
- (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
-
-def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0),
- (SPUlo tconstpool:$in, 0)),
- (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
-
-def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)),
- (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
-
-def : Pat<(add (SPUhi texternalsym:$in, 0), (SPUlo texternalsym:$in, 0)),
- (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
-
-def : Pat<(add (SPUhi tjumptable:$in, 0), (SPUlo tjumptable:$in, 0)),
- (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
-
-def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)),
- (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
-
-// Intrinsics:
-include "CellSDKIntrinsics.td"
-// Various math operator instruction sequences
-include "SPUMathInstr.td"
-// 64-bit "instructions"/support
-include "SPU64InstrInfo.td"
-// 128-bit "instructions"/support
-include "SPU128InstrInfo.td"
diff --git a/lib/Target/CellSPU/SPUMachineFunction.cpp b/lib/Target/CellSPU/SPUMachineFunction.cpp
deleted file mode 100644
index 3e948d071d..0000000000
--- a/lib/Target/CellSPU/SPUMachineFunction.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-//==-- SPUMachineFunctionInfo.cpp - Private data used for CellSPU ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUMachineFunction.h"
-
-using namespace llvm;
-
-void SPUFunctionInfo::anchor() { }
diff --git a/lib/Target/CellSPU/SPUMachineFunction.h b/lib/Target/CellSPU/SPUMachineFunction.h
deleted file mode 100644
index 399684bb08..0000000000
--- a/lib/Target/CellSPU/SPUMachineFunction.h
+++ /dev/null
@@ -1,50 +0,0 @@
-//===-- SPUMachineFunctionInfo.h - Private data used for CellSPU --*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the IBM Cell SPU specific subclass of MachineFunctionInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_MACHINE_FUNCTION_INFO_H
-#define SPU_MACHINE_FUNCTION_INFO_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-
-namespace llvm {
-
-/// SPUFunctionInfo - Cell SPU target-specific information for each
-/// MachineFunction
-class SPUFunctionInfo : public MachineFunctionInfo {
- virtual void anchor();
-
- /// UsesLR - Indicates whether LR is used in the current function.
- ///
- bool UsesLR;
-
- // VarArgsFrameIndex - FrameIndex for start of varargs area.
- int VarArgsFrameIndex;
-
-public:
- SPUFunctionInfo(MachineFunction& MF)
- : UsesLR(false),
- VarArgsFrameIndex(0)
- {}
-
- void setUsesLR(bool U) { UsesLR = U; }
- bool usesLR() { return UsesLR; }
-
- int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
- void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
-};
-
-} // end of namespace llvm
-
-
-#endif
-
diff --git a/lib/Target/CellSPU/SPUMathInstr.td b/lib/Target/CellSPU/SPUMathInstr.td
deleted file mode 100644
index 9a5c3976af..0000000000
--- a/lib/Target/CellSPU/SPUMathInstr.td
+++ /dev/null
@@ -1,97 +0,0 @@
-//===-- SPUMathInst.td - Cell SPU math operations ---------*- tablegen -*--===//
-//
-// Cell SPU math operations
-//
-// This target description file contains instruction sequences for various
-// math operations, such as vector multiplies, i32 multiply, etc., for the
-// SPU's i32, i16 i8 and corresponding vector types.
-//
-// Any resemblance to libsimdmath or the Cell SDK simdmath library is
-// purely and completely coincidental.
-//===----------------------------------------------------------------------===//
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v16i8 multiply instruction sequence:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
- (ORv4i32
- (ANDv4i32
- (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
- (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
- (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
- (FSMBIv8i16 0x2222)),
- (ILAv4i32 0x0000ffff)),
- (SHLIv4i32
- (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
- (ROTMAIv4i32_i32 VECREG:$rB, 16)),
- (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
- (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
- (FSMBIv8i16 0x2222)), 16))>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v8i16 multiply instruction sequence:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
- (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
- (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
- (FSMBIv8i16 0xcccc))>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v4i32, i32 multiply instruction sequence:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-def MPYv4i32:
- Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
- (Av4i32
- (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)),
- (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
- (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;
-
-def MPYi32:
- Pat<(mul R32C:$rA, R32C:$rB),
- (Ar32
- (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
- (MPYHr32 R32C:$rB, R32C:$rA)),
- (MPYUr32 R32C:$rA, R32C:$rB))>;
-
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// f32, v4f32 divide instruction sequence:
-//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-// Reciprocal estimate and interpolation
-def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
-// Division estimate
-def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
-// Newton-Raphson iteration
-def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
- Interpf32.Fragment,
- DivEstf32.Fragment)>;
-// Epsilon addition
-def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
-
-def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
- (SELBf32_cond NRaphf32.Fragment,
- Epsilonf32.Fragment,
- (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
-
-// Reciprocal estimate and interpolation
-def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
-// Division estimate
-def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
-// Newton-Raphson iteration
-def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
- (v4f32 VECREG:$rB),
- (v4f32 VECREG:$rA)),
- Interpv4f32.Fragment,
- DivEstv4f32.Fragment)>;
-// Epsilon addition
-def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
-
-def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
- (SELBv4f32_cond NRaphv4f32.Fragment,
- Epsilonv4f32.Fragment,
- (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
- Epsilonv4f32.Fragment,
- (v4f32 VECREG:$rA)), -1))>;
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
deleted file mode 100644
index a47e9ef016..0000000000
--- a/lib/Target/CellSPU/SPUNodes.td
+++ /dev/null
@@ -1,159 +0,0 @@
-//=== SPUNodes.td - Specialized SelectionDAG nodes by CellSPU -*- tablegen -*-//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Type profiles and SelectionDAG nodes used by CellSPU
-//
-//===----------------------------------------------------------------------===//
-
-// Type profile for a call sequence
-def SDT_SPUCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
-
-// SPU_GenControl: Type profile for generating control words for insertions
-def SPU_GenControl : SDTypeProfile<1, 1, []>;
-def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>;
-
-def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq,
- [SDNPHasChain, SDNPOutGlue]>;
-def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq,
- [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
-//===----------------------------------------------------------------------===//
-// Operand constraints:
-//===----------------------------------------------------------------------===//
-
-def SDT_SPUCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
-def SPUcall : SDNode<"SPUISD::CALL", SDT_SPUCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-
-// Operand type constraints for vector shuffle/permute operations
-def SDT_SPUshuffle : SDTypeProfile<1, 3, [
- SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
-]>;
-
-// Vector binary operator type constraints (needs a further constraint to
-// ensure that operand 0 is a vector...):
-
-def SPUVecBinop: SDTypeProfile<1, 2, [
- SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
-]>;
-
-// Trinary operators, e.g., addx, carry generate
-def SPUIntTrinaryOp : SDTypeProfile<1, 3, [
- SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>
-]>;
-
-// SELECT_MASK type constraints: There are several variations for the various
-// vector types (this avoids having to bit_convert all over the place.)
-def SPUselmask_type: SDTypeProfile<1, 1, [
- SDTCisInt<1>
-]>;
-
-// SELB type constraints:
-def SPUselb_type: SDTypeProfile<1, 3, [
- SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<0, 3> ]>;
-
-// SPU Vector shift pseudo-instruction type constraints
-def SPUvecshift_type: SDTypeProfile<1, 2, [
- SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
-
-// "marker" type for i64 operators that need a shuffle mask
-// (i.e., uses cg or bg or another instruction that needs to
-// use shufb to get things in the right place.)
-// Op0: The result
-// Op1, 2: LHS, RHS
-// Op3: Carry-generate shuffle mask
-
-def SPUmarker_type : SDTypeProfile<1, 3, [
- SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>;
-
-//===----------------------------------------------------------------------===//
-// Synthetic/pseudo-instructions
-//===----------------------------------------------------------------------===//
-
-// SPU CNTB:
-def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
-
-// SPU vector shuffle node, matched by the SPUISD::SHUFB enum (see
-// SPUISelLowering.h):
-def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
-
-// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only):
-def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>;
-def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>;
-def SPUvec_sra: SDNode<"ISD::SRA", SPUvecshift_type, []>;
-
-def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>;
-def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>;
-
-// Vector rotate left, bits shifted out of the left are rotated in on the right
-def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
- SPUvecshift_type, []>;
-
-// Vector rotate left by bytes, but the count is given in bits and the SPU
-// internally converts it to bytes (saves an instruction to mask off lower
-// three bits)
-def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS",
- SPUvecshift_type>;
-
-// Shift entire quad left by bytes/bits. Zeros are shifted in on the right
-// SHL_BITS the same as SHL for i128, but ISD::SHL is not implemented for i128
-def SPUshlquad_l_bytes: SDNode<"SPUISD::SHL_BYTES", SPUvecshift_type, []>;
-def SPUshlquad_l_bits: SDNode<"SPUISD::SHL_BITS", SPUvecshift_type, []>;
-def SPUsrl_bytes: SDNode<"SPUISD::SRL_BYTES", SPUvecshift_type, []>;
-
-// SPU form select mask for bytes, immediate
-def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>;
-
-// SPU select bits instruction
-def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>;
-
-def SDTprefslot2vec: SDTypeProfile<1, 1, []>;
-def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>;
-
-def SPU_vec_demote : SDTypeProfile<1, 1, []>;
-def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>;
-
-// Address high and low components, used for [r+r] type addressing
-def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>;
-def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>;
-
-// PC-relative address
-def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>;
-
-// A-Form local store addresses
-def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>;
-
-// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses
-def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>;
-
-// i64 markers: supplies extra operands used to generate the i64 operator
-// instruction sequences
-def SPUadd64 : SDNode<"SPUISD::ADD64_MARKER", SPUmarker_type, []>;
-def SPUsub64 : SDNode<"SPUISD::SUB64_MARKER", SPUmarker_type, []>;
-def SPUmul64 : SDNode<"SPUISD::MUL64_MARKER", SPUmarker_type, []>;
-
-//===----------------------------------------------------------------------===//
-// Constraints: (taken from PPCInstrInfo.td)
-//===----------------------------------------------------------------------===//
-
-class RegConstraint<string C> {
- string Constraints = C;
-}
-
-class NoEncode<string E> {
- string DisableEncoding = E;
-}
-
-//===----------------------------------------------------------------------===//
-// Return (flag isn't quite what it means: the operations are flagged so that
-// instruction scheduling doesn't disassociate them.)
-//===----------------------------------------------------------------------===//
-
-def retflag : SDNode<"SPUISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
diff --git a/lib/Target/CellSPU/SPUNopFiller.cpp b/lib/Target/CellSPU/SPUNopFiller.cpp
deleted file mode 100644
index 7c58041e3b..0000000000
--- a/lib/Target/CellSPU/SPUNopFiller.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The final pass just before assembly printing. This pass is the last
-// checkpoint where nops and lnops are added to the instruction stream to
-// satisfy the dual issue requirements. The actual dual issue scheduling is
-// done (TODO: nowhere, currently)
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPU.h"
-#include "SPUTargetMachine.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
- struct SPUNopFiller : public MachineFunctionPass {
-
- TargetMachine &TM;
- const TargetInstrInfo *TII;
- const InstrItineraryData *IID;
- bool isEvenPlace; // the instruction slot (mem address) at hand is even/odd
-
- static char ID;
- SPUNopFiller(TargetMachine &tm)
- : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()),
- IID(tm.getInstrItineraryData())
- {
- DEBUG( dbgs() << "********** SPU Nop filler **********\n" ; );
- }
-
- virtual const char *getPassName() const {
- return "SPU nop/lnop Filler";
- }
-
- void runOnMachineBasicBlock(MachineBasicBlock &MBB);
-
- bool runOnMachineFunction(MachineFunction &F) {
- isEvenPlace = true; //all functions get an .align 3 directive at start
- for (MachineFunction::iterator FI = F.begin(), FE = F.end();
- FI != FE; ++FI)
- runOnMachineBasicBlock(*FI);
- return true; //never-ever do any more modifications, just print it!
- }
-
- typedef enum { none = 0, // no more instructions in this function / BB
- pseudo = 1, // this does not get executed
- even = 2,
- odd = 3 } SPUOpPlace;
- SPUOpPlace getOpPlacement( MachineInstr &instr );
-
- };
- char SPUNopFiller::ID = 0;
-
-}
-
-// Fill a BasicBlock to alignment.
-// In the assebly we align the functions to 'even' adresses, but
-// basic blocks have an implicit alignmnet. We hereby define
-// basic blocks to have the same, even, alignment.
-void SPUNopFiller::
-runOnMachineBasicBlock(MachineBasicBlock &MBB)
-{
- assert( isEvenPlace && "basic block start from odd address");
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
- {
- SPUOpPlace this_optype, next_optype;
- MachineBasicBlock::iterator J = I;
- J++;
-
- this_optype = getOpPlacement( *I );
- next_optype = none;
- while (J!=MBB.end()){
- next_optype = getOpPlacement( *J );
- ++J;
- if (next_optype != pseudo )
- break;
- }
-
- // padd: odd(wrong), even(wrong), ...
- // to: nop(corr), odd(corr), even(corr)...
- if( isEvenPlace && this_optype == odd && next_optype == even ) {
- DEBUG( dbgs() <<"Adding NOP before: "; );
- DEBUG( I->dump(); );
- BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::ENOP));
- isEvenPlace=false;
- }
-
- // padd: even(wrong), odd(wrong), ...
- // to: lnop(corr), even(corr), odd(corr)...
- else if ( !isEvenPlace && this_optype == even && next_optype == odd){
- DEBUG( dbgs() <<"Adding LNOP before: "; );
- DEBUG( I->dump(); );
- BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::LNOP));
- isEvenPlace=true;
- }
-
- // now go to next mem slot
- if( this_optype != pseudo )
- isEvenPlace = !isEvenPlace;
-
- }
-
- // padd basicblock end
- if( !isEvenPlace ){
- MachineBasicBlock::iterator J = MBB.end();
- J--;
- if (getOpPlacement( *J ) == odd) {
- DEBUG( dbgs() <<"Padding basic block with NOP\n"; );
- BuildMI(MBB, J, J->getDebugLoc(), TII->get(SPU::ENOP));
- }
- else {
- J++;
- DEBUG( dbgs() <<"Padding basic block with LNOP\n"; );
- BuildMI(MBB, J, DebugLoc(), TII->get(SPU::LNOP));
- }
- isEvenPlace=true;
- }
-}
-
-FunctionPass *llvm::createSPUNopFillerPass(SPUTargetMachine &tm) {
- return new SPUNopFiller(tm);
-}
-
-// Figure out if 'instr' is executed in the even or odd pipeline
-SPUNopFiller::SPUOpPlace
-SPUNopFiller::getOpPlacement( MachineInstr &instr ) {
- int sc = instr.getDesc().getSchedClass();
- const InstrStage *stage = IID->beginStage(sc);
- unsigned FUs = stage->getUnits();
- SPUOpPlace retval;
-
- switch( FUs ) {
- case 0: retval = pseudo; break;
- case 1: retval = odd; break;
- case 2: retval = even; break;
- default: retval= pseudo;
- assert( false && "got unknown FuncUnit\n");
- break;
- };
- return retval;
-}
diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td
deleted file mode 100644
index 6f8deef553..0000000000
--- a/lib/Target/CellSPU/SPUOperands.td
+++ /dev/null
@@ -1,664 +0,0 @@
-//===-- SPUOperands.td - Cell SPU Instruction Operands -----*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Cell SPU Instruction Operands:
-//===----------------------------------------------------------------------===//
-
-// TO_IMM32 - Convert an i8/i16 to i32.
-def TO_IMM32 : SDNodeXForm<imm, [{
- return getI32Imm(N->getZExtValue());
-}]>;
-
-// TO_IMM16 - Convert an i8/i32 to i16.
-def TO_IMM16 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i16);
-}]>;
-
-
-def LO16 : SDNodeXForm<imm, [{
- unsigned val = N->getZExtValue();
- // Transformation function: get the low 16 bits.
- return getI32Imm(val & 0xffff);
-}]>;
-
-def LO16_vec : SDNodeXForm<scalar_to_vector, [{
- SDValue OpVal(0, 0);
-
- // Transformation function: get the low 16 bit immediate from a build_vector
- // node.
- assert(N->getOpcode() == ISD::BUILD_VECTOR
- && "LO16_vec got something other than a BUILD_VECTOR");
-
- // Get first constant operand...
- for (unsigned i = 0, e = N->getNumOperands();
- OpVal.getNode() == 0 && i != e; ++i) {
- if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
- if (OpVal.getNode() == 0)
- OpVal = N->getOperand(i);
- }
-
- assert(OpVal.getNode() != 0 && "LO16_vec did not locate a <defined> node");
- ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
- return getI32Imm((unsigned)CN->getZExtValue() & 0xffff);
-}]>;
-
-// Transform an immediate, returning the high 16 bits shifted down:
-def HI16 : SDNodeXForm<imm, [{
- return getI32Imm((unsigned)N->getZExtValue() >> 16);
-}]>;
-
-// Transformation function: shift the high 16 bit immediate from a build_vector
-// node into the low 16 bits, and return a 16-bit constant.
-def HI16_vec : SDNodeXForm<scalar_to_vector, [{
- SDValue OpVal(0, 0);
-
- assert(N->getOpcode() == ISD::BUILD_VECTOR
- && "HI16_vec got something other than a BUILD_VECTOR");
-
- // Get first constant operand...
- for (unsigned i = 0, e = N->getNumOperands();
- OpVal.getNode() == 0 && i != e; ++i) {
- if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
- if (OpVal.getNode() == 0)
- OpVal = N->getOperand(i);
- }
-
- assert(OpVal.getNode() != 0 && "HI16_vec did not locate a <defined> node");
- ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
- return getI32Imm((unsigned)CN->getZExtValue() >> 16);
-}]>;
-
-// simm7 predicate - True if the immediate fits in an 7-bit signed
-// field.
-def simm7: PatLeaf<(imm), [{
- int sextVal = int(N->getSExtValue());
- return (sextVal >= -64 && sextVal <= 63);
-}]>;
-
-// uimm7 predicate - True if the immediate fits in an 7-bit unsigned
-// field.
-def uimm7: PatLeaf<(imm), [{
- return (N->getZExtValue() <= 0x7f);
-}]>;
-
-// immSExt8 predicate - True if the immediate fits in an 8-bit sign extended
-// field.
-def immSExt8 : PatLeaf<(imm), [{
- int Value = int(N->getSExtValue());
- return (Value >= -(1 << 8) && Value <= (1 << 8) - 1);
-}]>;
-
-// immU8: immediate, unsigned 8-bit quantity
-def immU8 : PatLeaf<(imm), [{
- return (N->getZExtValue() <= 0xff);
-}]>;
-
-// i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign
-// extended field. Used by RI10Form instructions like 'ldq'.
-def i32ImmSExt10 : PatLeaf<(imm), [{
- return isI32IntS10Immediate(N);
-}]>;
-
-// i32ImmUns10 predicate - True if the i32 immediate fits in a 10-bit unsigned
-// field. Used by RI10Form instructions like 'ldq'.
-def i32ImmUns10 : PatLeaf<(imm), [{
- return isI32IntU10Immediate(N);
-}]>;
-
-// i16ImmSExt10 predicate - True if the i16 immediate fits in a 10-bit sign
-// extended field. Used by RI10Form instructions like 'ldq'.
-def i16ImmSExt10 : PatLeaf<(imm), [{
- return isI16IntS10Immediate(N);
-}]>;
-
-// i16ImmUns10 predicate - True if the i16 immediate fits into a 10-bit unsigned
-// value. Used by RI10Form instructions.
-def i16ImmUns10 : PatLeaf<(imm), [{
- return isI16IntU10Immediate(N);
-}]>;
-
-def immSExt16 : PatLeaf<(imm), [{
- // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- short Ignored;
- return isIntS16Immediate(N, Ignored);
-}]>;
-
-def immZExt16 : PatLeaf<(imm), [{
- // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
- // field.
- return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
-}], LO16>;
-
-def immU16 : PatLeaf<(imm), [{
- // immU16 predicate- True if the immediate fits into a 16-bit unsigned field.
- return (uint64_t)N->getZExtValue() == (N->getZExtValue() & 0xffff);
-}]>;
-
-def imm18 : PatLeaf<(imm), [{
- // imm18 predicate: True if the immediate fits into an 18-bit unsigned field.
- int Value = (int) N->getZExtValue();
- return isUInt<18>(Value);
-}]>;
-
-def lo16 : PatLeaf<(imm), [{
- // lo16 predicate - returns true if the immediate has all zeros in the
- // low order bits and is a 32-bit constant:
- if (N->getValueType(0) == MVT::i32) {
- uint32_t val = N->getZExtValue();
- return ((val & 0x0000ffff) == val);
- }
-
- return false;
-}], LO16>;
-
-def hi16 : PatLeaf<(imm), [{
- // hi16 predicate - returns true if the immediate has all zeros in the
- // low order bits and is a 32-bit constant:
- if (N->getValueType(0) == MVT::i32) {
- uint32_t val = uint32_t(N->getZExtValue());
- return ((val & 0xffff0000) == val);
- } else if (N->getValueType(0) == MVT::i64) {
- uint64_t val = N->getZExtValue();
- return ((val & 0xffff0000ULL) == val);
- }
-
- return false;
-}], HI16>;
-
-def bitshift : PatLeaf<(imm), [{
- // bitshift predicate - returns true if 0 < imm <= 7 for SHLQBII
- // (shift left quadword by bits immediate)
- int64_t Val = N->getZExtValue();
- return (Val > 0 && Val <= 7);
-}]>;
-
-//===----------------------------------------------------------------------===//
-// Floating point operands:
-//===----------------------------------------------------------------------===//
-
-// Transform a float, returning the high 16 bits shifted down, as if
-// the float was really an unsigned integer:
-def HI16_f32 : SDNodeXForm<fpimm, [{
- float fval = N->getValueAPF().convertToFloat();
- return getI32Imm(FloatToBits(fval) >> 16);
-}]>;
-
-// Transformation function on floats: get the low 16 bits as if the float was
-// an unsigned integer.
-def LO16_f32 : SDNodeXForm<fpimm, [{
- float fval = N->getValueAPF().convertToFloat();
- return getI32Imm(FloatToBits(fval) & 0xffff);
-}]>;
-
-def FPimm_sext16 : SDNodeXForm<fpimm, [{
- float fval = N->getValueAPF().convertToFloat();
- return getI32Imm((int) ((FloatToBits(fval) << 16) >> 16));
-}]>;
-
-def FPimm_u18 : SDNodeXForm<fpimm, [{
- float fval = N->getValueAPF().convertToFloat();
- return getI32Imm(FloatToBits(fval) & ((1 << 18) - 1));
-}]>;
-
-def fpimmSExt16 : PatLeaf<(fpimm), [{
- short Ignored;
- return isFPS16Immediate(N, Ignored);
-}], FPimm_sext16>;
-
-// Does the SFP constant only have upp 16 bits set?
-def hi16_f32 : PatLeaf<(fpimm), [{
- if (N->getValueType(0) == MVT::f32) {
- uint32_t val = FloatToBits(N->getValueAPF().convertToFloat());
- return ((val & 0xffff0000) == val);
- }
-
- return false;
-}], HI16_f32>;
-
-// Does the SFP constant fit into 18 bits?
-def fpimm18 : PatLeaf<(fpimm), [{
- if (N->getValueType(0) == MVT::f32) {
- uint32_t Value = FloatToBits(N->getValueAPF().convertToFloat());
- return isUInt<18>(Value);
- }
-
- return false;
-}], FPimm_u18>;
-
-//===----------------------------------------------------------------------===//
-// 64-bit operands (TODO):
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// build_vector operands:
-//===----------------------------------------------------------------------===//
-
-// v16i8SExt8Imm_xform function: convert build_vector to 8-bit sign extended
-// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
-// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
-def v16i8SExt8Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
-}]>;
-
-// v16i8SExt8Imm: Predicate test for 8-bit sign extended immediate constant
-// load, works in conjunction with its transform function. N.B.: This relies the
-// incoming constant being a 16-bit quantity, where the upper and lower bytes
-// are EXACTLY the same (e.g., 0x2a2a)
-def v16i8SExt8Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
-}], v16i8SExt8Imm_xform>;
-
-// v16i8U8Imm_xform function: convert build_vector to unsigned 8-bit
-// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
-// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
-def v16i8U8Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
-}]>;
-
-// v16i8U8Imm: Predicate test for unsigned 8-bit immediate constant
-// load, works in conjunction with its transform function. N.B.: This relies the
-// incoming constant being a 16-bit quantity, where the upper and lower bytes
-// are EXACTLY the same (e.g., 0x2a2a)
-def v16i8U8Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
-}], v16i8U8Imm_xform>;
-
-// v8i16SExt8Imm_xform function: convert build_vector to 8-bit sign extended
-// immediate constant load for v8i16 vectors.
-def v8i16SExt8Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16);
-}]>;
-
-// v8i16SExt8Imm: Predicate test for 8-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v8i16SExt8Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16).getNode() != 0;
-}], v8i16SExt8Imm_xform>;
-
-// v8i16SExt10Imm_xform function: convert build_vector to 16-bit sign extended
-// immediate constant load for v8i16 vectors.
-def v8i16SExt10Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
-}]>;
-
-// v8i16SExt10Imm: Predicate test for 16-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v8i16SExt10Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
-}], v8i16SExt10Imm_xform>;
-
-// v8i16Uns10Imm_xform function: convert build_vector to 16-bit unsigned
-// immediate constant load for v8i16 vectors.
-def v8i16Uns10Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
-}]>;
-
-// v8i16Uns10Imm: Predicate test for 16-bit unsigned immediate constant
-// load, works in conjunction with its transform function.
-def v8i16Uns10Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
-}], v8i16Uns10Imm_xform>;
-
-// v8i16SExt16Imm_xform function: convert build_vector to 16-bit sign extended
-// immediate constant load for v8i16 vectors.
-def v8i16Uns16Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16);
-}]>;
-
-// v8i16SExt16Imm: Predicate test for 16-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v8i16SExt16Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16).getNode() != 0;
-}], v8i16Uns16Imm_xform>;
-
-// v4i32SExt10Imm_xform function: convert build_vector to 10-bit sign extended
-// immediate constant load for v4i32 vectors.
-def v4i32SExt10Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
-}]>;
-
-// v4i32SExt10Imm: Predicate test for 10-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v4i32SExt10Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
-}], v4i32SExt10Imm_xform>;
-
-// v4i32Uns10Imm_xform function: convert build_vector to 10-bit unsigned
-// immediate constant load for v4i32 vectors.
-def v4i32Uns10Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
-}]>;
-
-// v4i32Uns10Imm: Predicate test for 10-bit unsigned immediate constant
-// load, works in conjunction with its transform function.
-def v4i32Uns10Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
-}], v4i32Uns10Imm_xform>;
-
-// v4i32SExt16Imm_xform function: convert build_vector to 16-bit sign extended
-// immediate constant load for v4i32 vectors.
-def v4i32SExt16Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32);
-}]>;
-
-// v4i32SExt16Imm: Predicate test for 16-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v4i32SExt16Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32).getNode() != 0;
-}], v4i32SExt16Imm_xform>;
-
-// v4i32Uns18Imm_xform function: convert build_vector to 18-bit unsigned
-// immediate constant load for v4i32 vectors.
-def v4i32Uns18Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32);
-}]>;
-
-// v4i32Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
-// works in conjunction with its transform function.
-def v4i32Uns18Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32).getNode() != 0;
-}], v4i32Uns18Imm_xform>;
-
-// ILHUvec_get_imm xform function: convert build_vector to ILHUvec imm constant
-// load.
-def ILHUvec_get_imm: SDNodeXForm<build_vector, [{
- return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32);
-}]>;
-
-/// immILHUvec: Predicate test for a ILHU constant vector.
-def immILHUvec: PatLeaf<(build_vector), [{
- return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32).getNode() != 0;
-}], ILHUvec_get_imm>;
-
-// Catch-all for any other i32 vector constants
-def v4i32_get_imm: SDNodeXForm<build_vector, [{
- return SPU::get_v4i32_imm(N, *CurDAG);
-}]>;
-
-def v4i32Imm: PatLeaf<(build_vector), [{
- return SPU::get_v4i32_imm(N, *CurDAG).getNode() != 0;
-}], v4i32_get_imm>;
-
-// v2i64SExt10Imm_xform function: convert build_vector to 10-bit sign extended
-// immediate constant load for v2i64 vectors.
-def v2i64SExt10Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64);
-}]>;
-
-// v2i64SExt10Imm: Predicate test for 10-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v2i64SExt10Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64).getNode() != 0;
-}], v2i64SExt10Imm_xform>;
-
-// v2i64SExt16Imm_xform function: convert build_vector to 16-bit sign extended
-// immediate constant load for v2i64 vectors.
-def v2i64SExt16Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64);
-}]>;
-
-// v2i64SExt16Imm: Predicate test for 16-bit sign extended immediate constant
-// load, works in conjunction with its transform function.
-def v2i64SExt16Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64).getNode() != 0;
-}], v2i64SExt16Imm_xform>;
-
-// v2i64Uns18Imm_xform function: convert build_vector to 18-bit unsigned
-// immediate constant load for v2i64 vectors.
-def v2i64Uns18Imm_xform: SDNodeXForm<build_vector, [{
- return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64);
-}]>;
-
-// v2i64Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
-// works in conjunction with its transform function.
-def v2i64Uns18Imm: PatLeaf<(build_vector), [{
- return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64).getNode() != 0;
-}], v2i64Uns18Imm_xform>;
-
-/// immILHUvec: Predicate test for a ILHU constant vector.
-def immILHUvec_i64: PatLeaf<(build_vector), [{
- return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i64).getNode() != 0;
-}], ILHUvec_get_imm>;
-
-// Catch-all for any other i32 vector constants
-def v2i64_get_imm: SDNodeXForm<build_vector, [{
- return SPU::get_v2i64_imm(N, *CurDAG);
-}]>;
-
-def v2i64Imm: PatLeaf<(build_vector), [{
- return SPU::get_v2i64_imm(N, *CurDAG).getNode() != 0;
-}], v2i64_get_imm>;
-
-//===----------------------------------------------------------------------===//
-// Operand Definitions.
-
-def s7imm: Operand<i8> {
- let PrintMethod = "printS7ImmOperand";
-}
-
-def s7imm_i8: Operand<i8> {
- let PrintMethod = "printS7ImmOperand";
-}
-
-def u7imm: Operand<i16> {
- let PrintMethod = "printU7ImmOperand";
-}
-
-def u7imm_i8: Operand<i8> {
- let PrintMethod = "printU7ImmOperand";
-}
-
-def u7imm_i32: Operand<i32> {
- let PrintMethod = "printU7ImmOperand";
-}
-
-// Halfword, signed 10-bit constant
-def s10imm : Operand<i16> {
- let PrintMethod = "printS10ImmOperand";
-}
-
-def s10imm_i8: Operand<i8> {
- let PrintMethod = "printS10ImmOperand";
-}
-
-def s10imm_i32: Operand<i32> {
- let PrintMethod = "printS10ImmOperand";
-}
-
-def s10imm_i64: Operand<i64> {
- let PrintMethod = "printS10ImmOperand";
-}
-
-// Unsigned 10-bit integers:
-def u10imm: Operand<i16> {
- let PrintMethod = "printU10ImmOperand";
-}
-
-def u10imm_i8: Operand<i8> {
- let PrintMethod = "printU10ImmOperand";
-}
-
-def u10imm_i32: Operand<i32> {
- let PrintMethod = "printU10ImmOperand";
-}
-
-def s16imm : Operand<i16> {
- let PrintMethod = "printS16ImmOperand";
-}
-
-def s16imm_i8: Operand<i8> {
- let PrintMethod = "printS16ImmOperand";
-}
-
-def s16imm_i32: Operand<i32> {
- let PrintMethod = "printS16ImmOperand";
-}
-
-def s16imm_i64: Operand<i64> {
- let PrintMethod = "printS16ImmOperand";
-}
-
-def s16imm_f32: Operand<f32> {
- let PrintMethod = "printS16ImmOperand";
-}
-
-def s16imm_f64: Operand<f64> {
- let PrintMethod = "printS16ImmOperand";
-}
-
-def u16imm_i64 : Operand<i64> {
- let PrintMethod = "printU16ImmOperand";
-}
-
-def u16imm_i32 : Operand<i32> {
- let PrintMethod = "printU16ImmOperand";
-}
-
-def u16imm : Operand<i16> {
- let PrintMethod = "printU16ImmOperand";
-}
-
-def f16imm : Operand<f32> {
- let PrintMethod = "printU16ImmOperand";
-}
-
-def s18imm : Operand<i32> {
- let PrintMethod = "printS18ImmOperand";
-}
-
-def u18imm : Operand<i32> {
- let PrintMethod = "printU18ImmOperand";
-}
-
-def u18imm_i64 : Operand<i64> {
- let PrintMethod = "printU18ImmOperand";
-}
-
-def f18imm : Operand<f32> {
- let PrintMethod = "printU18ImmOperand";
-}
-
-def f18imm_f64 : Operand<f64> {
- let PrintMethod = "printU18ImmOperand";
-}
-
-// Negated 7-bit halfword rotate immediate operands
-def rothNeg7imm : Operand<i32> {
- let PrintMethod = "printROTHNeg7Imm";
-}
-
-def rothNeg7imm_i16 : Operand<i16> {
- let PrintMethod = "printROTHNeg7Imm";
-}
-
-// Negated 7-bit word rotate immediate operands
-def rotNeg7imm : Operand<i32> {
- let PrintMethod = "printROTNeg7Imm";
-}
-
-def rotNeg7imm_i16 : Operand<i16> {
- let PrintMethod = "printROTNeg7Imm";
-}
-
-def rotNeg7imm_i8 : Operand<i8> {
- let PrintMethod = "printROTNeg7Imm";
-}
-
-def target : Operand<OtherVT> {
- let PrintMethod = "printBranchOperand";
-}
-
-// Absolute address call target
-def calltarget : Operand<iPTR> {
- let PrintMethod = "printCallOperand";
- let MIOperandInfo = (ops u18imm:$calldest);
-}
-
-// PC relative call target
-def relcalltarget : Operand<iPTR> {
- let PrintMethod = "printPCRelativeOperand";
- let MIOperandInfo = (ops s16imm:$calldest);
-}
-
-// Branch targets:
-def brtarget : Operand<OtherVT> {
- let PrintMethod = "printPCRelativeOperand";
-}
-
-// Hint for branch target
-def hbrtarget : Operand<OtherVT> {
- let PrintMethod = "printHBROperand";
-}
-
-// Indirect call target
-def indcalltarget : Operand<iPTR> {
- let PrintMethod = "printCallOperand";
- let MIOperandInfo = (ops ptr_rc:$calldest);
-}
-
-def symbolHi: Operand<i32> {
- let PrintMethod = "printSymbolHi";
-}
-
-def symbolLo: Operand<i32> {
- let PrintMethod = "printSymbolLo";
-}
-
-def symbolLSA: Operand<i32> {
- let PrintMethod = "printSymbolLSA";
-}
-
-// Shuffle address memory operaand [s7imm(reg) d-format]
-def shufaddr : Operand<iPTR> {
- let PrintMethod = "printShufAddr";
- let MIOperandInfo = (ops s7imm:$imm, ptr_rc:$reg);
-}
-
-// memory s10imm(reg) operand
-def dformaddr : Operand<iPTR> {
- let PrintMethod = "printDFormAddr";
- let MIOperandInfo = (ops s10imm:$imm, ptr_rc:$reg);
-}
-
-// 256K local store address
-// N.B.: The tblgen code generator expects to have two operands, an offset
-// and a pointer. Of these, only the immediate is actually used.
-def addr256k : Operand<iPTR> {
- let PrintMethod = "printAddr256K";
- let MIOperandInfo = (ops s16imm:$imm, ptr_rc:$reg);
-}
-
-// memory s18imm(reg) operand
-def memri18 : Operand<iPTR> {
- let PrintMethod = "printMemRegImmS18";
- let MIOperandInfo = (ops s18imm:$imm, ptr_rc:$reg);
-}
-
-// memory register + register operand
-def memrr : Operand<iPTR> {
- let PrintMethod = "printMemRegReg";
- let MIOperandInfo = (ops ptr_rc:$reg_a, ptr_rc:$reg_b);
-}
-
-// Define SPU-specific addressing modes: These come in three basic
-// flavors:
-//
-// D-form : [r+I10] (10-bit signed offset + reg)
-// X-form : [r+r] (reg+reg)
-// A-form : abs (256K LSA offset)
-// D-form(2): [r+I7] (7-bit signed offset + reg)
-
-def dform_addr : ComplexPattern<iPTR, 2, "SelectDFormAddr",
- [], [SDNPWantRoot]>;
-def xform_addr : ComplexPattern<iPTR, 2, "SelectXFormAddr",
- [], [SDNPWantRoot]>;
-def aform_addr : ComplexPattern<iPTR, 2, "SelectAFormAddr",
- [], [SDNPWantRoot]>;
-def dform2_addr : ComplexPattern<iPTR, 2, "SelectDForm2Addr",
- [], [SDNPWantRoot]>;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
deleted file mode 100644
index e6c872d0bb..0000000000
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ /dev/null
@@ -1,357 +0,0 @@
-//===-- SPURegisterInfo.cpp - Cell SPU Register Information ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Cell implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "reginfo"
-#include "SPURegisterInfo.h"
-#include "SPU.h"
-#include "SPUInstrBuilder.h"
-#include "SPUSubtarget.h"
-#include "SPUMachineFunction.h"
-#include "SPUFrameLowering.h"
-#include "llvm/Constants.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include <cstdlib>
-
-#define GET_REGINFO_TARGET_DESC
-#include "SPUGenRegisterInfo.inc"
-
-using namespace llvm;
-
-/// getRegisterNumbering - Given the enum value for some register, e.g.
-/// PPC::F14, return the number that it corresponds to (e.g. 14).
-unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) {
- using namespace SPU;
- switch (RegEnum) {
- case SPU::R0: return 0;
- case SPU::R1: return 1;
- case SPU::R2: return 2;
- case SPU::R3: return 3;
- case SPU::R4: return 4;
- case SPU::R5: return 5;
- case SPU::R6: return 6;
- case SPU::R7: return 7;
- case SPU::R8: return 8;
- case SPU::R9: return 9;
- case SPU::R10: return 10;
- case SPU::R11: return 11;
- case SPU::R12: return 12;
- case SPU::R13: return 13;
- case SPU::R14: return 14;
- case SPU::R15: return 15;
- case SPU::R16: return 16;
- case SPU::R17: return 17;
- case SPU::R18: return 18;
- case SPU::R19: return 19;
- case SPU::R20: return 20;
- case SPU::R21: return 21;
- case SPU::R22: return 22;
- case SPU::R23: return 23;
- case SPU::R24: return 24;
- case SPU::R25: return 25;
- case SPU::R26: return 26;
- case SPU::R27: return 27;
- case SPU::R28: return 28;
- case SPU::R29: return 29;
- case SPU::R30: return 30;
- case SPU::R31: return 31;
- case SPU::R32: return 32;
- case SPU::R33: return 33;
- case SPU::R34: return 34;
- case SPU::R35: return 35;
- case SPU::R36: return 36;
- case SPU::R37: return 37;
- case SPU::R38: return 38;
- case SPU::R39: return 39;
- case SPU::R40: return 40;
- case SPU::R41: return 41;
- case SPU::R42: return 42;
- case SPU::R43: return 43;
- case SPU::R44: return 44;
- case SPU::R45: return 45;
- case SPU::R46: return 46;
- case SPU::R47: return 47;
- case SPU::R48: return 48;
- case SPU::R49: return 49;
- case SPU::R50: return 50;
- case SPU::R51: return 51;
- case SPU::R52: return 52;
- case SPU::R53: return 53;
- case SPU::R54: return 54;
- case SPU::R55: return 55;
- case SPU::R56: return 56;
- case SPU::R57: return 57;
- case SPU::R58: return 58;
- case SPU::R59: return 59;
- case SPU::R60: return 60;
- case SPU::R61: return 61;
- case SPU::R62: return 62;
- case SPU::R63: return 63;
- case SPU::R64: return 64;
- case SPU::R65: return 65;
- case SPU::R66: return 66;
- case SPU::R67: return 67;
- case SPU::R68: return 68;
- case SPU::R69: return 69;
- case SPU::R70: return 70;
- case SPU::R71: return 71;
- case SPU::R72: return 72;
- case SPU::R73: return 73;
- case SPU::R74: return 74;
- case SPU::R75: return 75;
- case SPU::R76: return 76;
- case SPU::R77: return 77;
- case SPU::R78: return 78;
- case SPU::R79: return 79;
- case SPU::R80: return 80;
- case SPU::R81: return 81;
- case SPU::R82: return 82;
- case SPU::R83: return 83;
- case SPU::R84: return 84;
- case SPU::R85: return 85;
- case SPU::R86: return 86;
- case SPU::R87: return 87;
- case SPU::R88: return 88;
- case SPU::R89: return 89;
- case SPU::R90: return 90;
- case SPU::R91: return 91;
- case SPU::R92: return 92;
- case SPU::R93: return 93;
- case SPU::R94: return 94;
- case SPU::R95: return 95;
- case SPU::R96: return 96;
- case SPU::R97: return 97;
- case SPU::R98: return 98;
- case SPU::R99: return 99;
- case SPU::R100: return 100;
- case SPU::R101: return 101;
- case SPU::R102: return 102;
- case SPU::R103: return 103;
- case SPU::R104: return 104;
- case SPU::R105: return 105;
- case SPU::R106: return 106;
- case SPU::R107: return 107;
- case SPU::R108: return 108;
- case SPU::R109: return 109;
- case SPU::R110: return 110;
- case SPU::R111: return 111;
- case SPU::R112: return 112;
- case SPU::R113: return 113;
- case SPU::R114: return 114;
- case SPU::R115: return 115;
- case SPU::R116: return 116;
- case SPU::R117: return 117;
- case SPU::R118: return 118;
- case SPU::R119: return 119;
- case SPU::R120: return 120;
- case SPU::R121: return 121;
- case SPU::R122: return 122;
- case SPU::R123: return 123;
- case SPU::R124: return 124;
- case SPU::R125: return 125;
- case SPU::R126: return 126;
- case SPU::R127: return 127;
- default:
- report_fatal_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering");
- }
-}
-
-SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget,
- const TargetInstrInfo &tii) :
- SPUGenRegisterInfo(SPU::R0), Subtarget(subtarget), TII(tii)
-{
-}
-
-/// getPointerRegClass - Return the register class to use to hold pointers.
-/// This is used for addressing modes.
-const TargetRegisterClass *
-SPURegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
- const {
- return &SPU::R32CRegClass;
-}
-
-const uint16_t *
-SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
-{
- // Cell ABI calling convention
- static const uint16_t SPU_CalleeSaveRegs[] = {
- SPU::R80, SPU::R81, SPU::R82, SPU::R83,
- SPU::R84, SPU::R85, SPU::R86, SPU::R87,
- SPU::R88, SPU::R89, SPU::R90, SPU::R91,
- SPU::R92, SPU::R93, SPU::R94, SPU::R95,
- SPU::R96, SPU::R97, SPU::R98, SPU::R99,
- SPU::R100, SPU::R101, SPU::R102, SPU::R103,
- SPU::R104, SPU::R105, SPU::R106, SPU::R107,
- SPU::R108, SPU::R109, SPU::R110, SPU::R111,
- SPU::R112, SPU::R113, SPU::R114, SPU::R115,
- SPU::R116, SPU::R117, SPU::R118, SPU::R119,
- SPU::R120, SPU::R121, SPU::R122, SPU::R123,
- SPU::R124, SPU::R125, SPU::R126, SPU::R127,
- SPU::R2, /* environment pointer */
- SPU::R1, /* stack pointer */
- SPU::R0, /* link register */
- 0 /* end */
- };
-
- return SPU_CalleeSaveRegs;
-}
-
-/*!
- R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is
- generally unused) are the Cell's reserved registers
- */
-BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const {
- BitVector Reserved(getNumRegs());
- Reserved.set(SPU::R0); // LR
- Reserved.set(SPU::R1); // SP
- Reserved.set(SPU::R2); // environment pointer
- return Reserved;
-}
-
-//===----------------------------------------------------------------------===//
-// Stack Frame Processing methods
-//===----------------------------------------------------------------------===//
-
-//--------------------------------------------------------------------------
-void
-SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I)
- const
-{
- // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
- MBB.erase(I);
-}
-
-void
-SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- RegScavenger *RS) const
-{
- unsigned i = 0;
- MachineInstr &MI = *II;
- MachineBasicBlock &MBB = *MI.getParent();
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- DebugLoc dl = II->getDebugLoc();
-
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- MachineOperand &SPOp = MI.getOperand(i);
- int FrameIndex = SPOp.getIndex();
-
- // Now add the frame object offset to the offset from r1.
- int Offset = MFI->getObjectOffset(FrameIndex);
-
- // Most instructions, except for generated FrameIndex additions using AIr32
- // and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the
- // immediate in operand 2.
- unsigned OpNo = 1;
- if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32)
- OpNo = 2;
-
- MachineOperand &MO = MI.getOperand(OpNo);
-
- // Offset is biased by $lr's slot at the bottom.
- Offset += MO.getImm() + MFI->getStackSize() + SPUFrameLowering::minStackSize();
- assert((Offset & 0xf) == 0
- && "16-byte alignment violated in eliminateFrameIndex");
-
- // Replace the FrameIndex with base register with $sp (aka $r1)
- SPOp.ChangeToRegister(SPU::R1, false);
-
- // if 'Offset' doesn't fit to the D-form instruction's
- // immediate, convert the instruction to X-form
- // if the instruction is not an AI (which takes a s10 immediate), assume
- // it is a load/store that can take a s14 immediate
- if ((MI.getOpcode() == SPU::AIr32 && !isInt<10>(Offset))
- || !isInt<14>(Offset)) {
- int newOpcode = convertDFormToXForm(MI.getOpcode());
- unsigned tmpReg = findScratchRegister(II, RS, &SPU::R32CRegClass, SPAdj);
- BuildMI(MBB, II, dl, TII.get(SPU::ILr32), tmpReg )
- .addImm(Offset);
- BuildMI(MBB, II, dl, TII.get(newOpcode), MI.getOperand(0).getReg())
- .addReg(tmpReg, RegState::Kill)
- .addReg(SPU::R1);
- // remove the replaced D-form instruction
- MBB.erase(II);
- } else {
- MO.ChangeToImmediate(Offset);
- }
-}
-
-unsigned
-SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const
-{
- return SPU::R1;
-}
-
-int
-SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
-{
- switch(dFormOpcode)
- {
- case SPU::AIr32: return SPU::Ar32;
- case SPU::LQDr32: return SPU::LQXr32;
- case SPU::LQDr128: return SPU::LQXr128;
- case SPU::LQDv16i8: return SPU::LQXv16i8;
- case SPU::LQDv4i32: return SPU::LQXv4i32;
- case SPU::LQDv4f32: return SPU::LQXv4f32;
- case SPU::STQDr32: return SPU::STQXr32;
- case SPU::STQDr128: return SPU::STQXr128;
- case SPU::STQDv16i8: return SPU::STQXv16i8;
- case SPU::STQDv4i32: return SPU::STQXv4i32;
- case SPU::STQDv4f32: return SPU::STQXv4f32;
-
- default: assert( false && "Unhandled D to X-form conversion");
- }
- // default will assert, but need to return something to keep the
- // compiler happy.
- return dFormOpcode;
-}
-
-// TODO this is already copied from PPC. Could this convenience function
-// be moved to the RegScavenger class?
-unsigned
-SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II,
- RegScavenger *RS,
- const TargetRegisterClass *RC,
- int SPAdj) const
-{
- assert(RS && "Register scavenging must be on");
- unsigned Reg = RS->FindUnusedReg(RC);
- if (Reg == 0)
- Reg = RS->scavengeRegister(RC, II, SPAdj);
- assert( Reg && "Register scavenger failed");
- return Reg;
-}
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
deleted file mode 100644
index e9f9aba63a..0000000000
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ /dev/null
@@ -1,106 +0,0 @@
-//===-- SPURegisterInfo.h - Cell SPU Register Information Impl --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Cell SPU implementation of the TargetRegisterInfo
-// class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_REGISTERINFO_H
-#define SPU_REGISTERINFO_H
-
-#include "SPU.h"
-
-#define GET_REGINFO_HEADER
-#include "SPUGenRegisterInfo.inc"
-
-namespace llvm {
- class SPUSubtarget;
- class TargetInstrInfo;
- class Type;
-
- class SPURegisterInfo : public SPUGenRegisterInfo {
- private:
- const SPUSubtarget &Subtarget;
- const TargetInstrInfo &TII;
-
- //! Predicate: Does the machine function use the link register?
- bool usesLR(MachineFunction &MF) const;
-
- public:
- SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii);
-
- //! Translate a register's enum value to a register number
- /*!
- This method translates a register's enum value to it's regiser number,
- e.g. SPU::R14 -> 14.
- */
- static unsigned getRegisterNumbering(unsigned RegEnum);
-
- /// getPointerRegClass - Return the register class to use to hold pointers.
- /// This is used for addressing modes.
- virtual const TargetRegisterClass *
- getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
-
- /// After allocating this many registers, the allocator should feel
- /// register pressure. The value is a somewhat random guess, based on the
- /// number of non callee saved registers in the C calling convention.
- virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC,
- MachineFunction &MF) const{
- return 50;
- }
-
- //! Return the array of callee-saved registers
- virtual const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
-
- //! Allow for scavenging, so we can get scratch registers when needed.
- virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
- { return true; }
-
- //! Enable tracking of liveness after register allocation, since register
- // scavenging is enabled.
- virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const
- { return true; }
-
- //! Return the reserved registers
- BitVector getReservedRegs(const MachineFunction &MF) const;
-
- //! Eliminate the call frame setup pseudo-instructions
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
- //! Convert frame indicies into machine operands
- void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- RegScavenger *RS = NULL) const;
-
- //! Get the stack frame register (SP, aka R1)
- unsigned getFrameRegister(const MachineFunction &MF) const;
-
- //------------------------------------------------------------------------
- // New methods added:
- //------------------------------------------------------------------------
-
- //! Convert D-form load/store to X-form load/store
- /*!
- Converts a regiser displacement load/store into a register-indexed
- load/store for large stack frames, when the stack frame exceeds the
- range of a s10 displacement.
- */
- int convertDFormToXForm(int dFormOpcode) const;
-
- //! Acquire an unused register in an emergency.
- unsigned findScratchRegister(MachineBasicBlock::iterator II,
- RegScavenger *RS,
- const TargetRegisterClass *RC,
- int SPAdj) const;
-
- };
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td
deleted file mode 100644
index f27b042edd..0000000000
--- a/lib/Target/CellSPU/SPURegisterInfo.td
+++ /dev/null
@@ -1,183 +0,0 @@
-//===-- SPURegisterInfo.td - The Cell SPU Register File ----*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-class SPUReg<string n> : Register<n> {
- let Namespace = "SPU";
-}
-
-// The SPU's register are all 128-bits wide, which makes specifying the
-// registers relatively easy, if relatively mundane:
-
-class SPUVecReg<bits<7> num, string n> : SPUReg<n> {
- field bits<7> Num = num;
-}
-
-def R0 : SPUVecReg<0, "$lr">, DwarfRegNum<[0]>;
-def R1 : SPUVecReg<1, "$sp">, DwarfRegNum<[1]>;
-def R2 : SPUVecReg<2, "$2">, DwarfRegNum<[2]>;
-def R3 : SPUVecReg<3, "$3">, DwarfRegNum<[3]>;
-def R4 : SPUVecReg<4, "$4">, DwarfRegNum<[4]>;
-def R5 : SPUVecReg<5, "$5">, DwarfRegNum<[5]>;
-def R6 : SPUVecReg<6, "$6">, DwarfRegNum<[6]>;
-def R7 : SPUVecReg<7, "$7">, DwarfRegNum<[7]>;
-def R8 : SPUVecReg<8, "$8">, DwarfRegNum<[8]>;
-def R9 : SPUVecReg<9, "$9">, DwarfRegNum<[9]>;
-def R10 : SPUVecReg<10, "$10">, DwarfRegNum<[10]>;
-def R11 : SPUVecReg<11, "$11">, DwarfRegNum<[11]>;
-def R12 : SPUVecReg<12, "$12">, DwarfRegNum<[12]>;
-def R13 : SPUVecReg<13, "$13">, DwarfRegNum<[13]>;
-def R14 : SPUVecReg<14, "$14">, DwarfRegNum<[14]>;
-def R15 : SPUVecReg<15, "$15">, DwarfRegNum<[15]>;
-def R16 : SPUVecReg<16, "$16">, DwarfRegNum<[16]>;
-def R17 : SPUVecReg<17, "$17">, DwarfRegNum<[17]>;
-def R18 : SPUVecReg<18, "$18">, DwarfRegNum<[18]>;
-def R19 : SPUVecReg<19, "$19">, DwarfRegNum<[19]>;
-def R20 : SPUVecReg<20, "$20">, DwarfRegNum<[20]>;
-def R21 : SPUVecReg<21, "$21">, DwarfRegNum<[21]>;
-def R22 : SPUVecReg<22, "$22">, DwarfRegNum<[22]>;
-def R23 : SPUVecReg<23, "$23">, DwarfRegNum<[23]>;
-def R24 : SPUVecReg<24, "$24">, DwarfRegNum<[24]>;
-def R25 : SPUVecReg<25, "$25">, DwarfRegNum<[25]>;
-def R26 : SPUVecReg<26, "$26">, DwarfRegNum<[26]>;
-def R27 : SPUVecReg<27, "$27">, DwarfRegNum<[27]>;
-def R28 : SPUVecReg<28, "$28">, DwarfRegNum<[28]>;
-def R29 : SPUVecReg<29, "$29">, DwarfRegNum<[29]>;
-def R30 : SPUVecReg<30, "$30">, DwarfRegNum<[30]>;
-def R31 : SPUVecReg<31, "$31">, DwarfRegNum<[31]>;
-def R32 : SPUVecReg<32, "$32">, DwarfRegNum<[32]>;
-def R33 : SPUVecReg<33, "$33">, DwarfRegNum<[33]>;
-def R34 : SPUVecReg<34, "$34">, DwarfRegNum<[34]>;
-def R35 : SPUVecReg<35, "$35">, DwarfRegNum<[35]>;
-def R36 : SPUVecReg<36, "$36">, DwarfRegNum<[36]>;
-def R37 : SPUVecReg<37, "$37">, DwarfRegNum<[37]>;
-def R38 : SPUVecReg<38, "$38">, DwarfRegNum<[38]>;
-def R39 : SPUVecReg<39, "$39">, DwarfRegNum<[39]>;
-def R40 : SPUVecReg<40, "$40">, DwarfRegNum<[40]>;
-def R41 : SPUVecReg<41, "$41">, DwarfRegNum<[41]>;
-def R42 : SPUVecReg<42, "$42">, DwarfRegNum<[42]>;
-def R43 : SPUVecReg<43, "$43">, DwarfRegNum<[43]>;
-def R44 : SPUVecReg<44, "$44">, DwarfRegNum<[44]>;
-def R45 : SPUVecReg<45, "$45">, DwarfRegNum<[45]>;
-def R46 : SPUVecReg<46, "$46">, DwarfRegNum<[46]>;
-def R47 : SPUVecReg<47, "$47">, DwarfRegNum<[47]>;
-def R48 : SPUVecReg<48, "$48">, DwarfRegNum<[48]>;
-def R49 : SPUVecReg<49, "$49">, DwarfRegNum<[49]>;
-def R50 : SPUVecReg<50, "$50">, DwarfRegNum<[50]>;
-def R51 : SPUVecReg<51, "$51">, DwarfRegNum<[51]>;
-def R52 : SPUVecReg<52, "$52">, DwarfRegNum<[52]>;
-def R53 : SPUVecReg<53, "$53">, DwarfRegNum<[53]>;
-def R54 : SPUVecReg<54, "$54">, DwarfRegNum<[54]>;
-def R55 : SPUVecReg<55, "$55">, DwarfRegNum<[55]>;
-def R56 : SPUVecReg<56, "$56">, DwarfRegNum<[56]>;
-def R57 : SPUVecReg<57, "$57">, DwarfRegNum<[57]>;
-def R58 : SPUVecReg<58, "$58">, DwarfRegNum<[58]>;
-def R59 : SPUVecReg<59, "$59">, DwarfRegNum<[59]>;
-def R60 : SPUVecReg<60, "$60">, DwarfRegNum<[60]>;
-def R61 : SPUVecReg<61, "$61">, DwarfRegNum<[61]>;
-def R62 : SPUVecReg<62, "$62">, DwarfRegNum<[62]>;
-def R63 : SPUVecReg<63, "$63">, DwarfRegNum<[63]>;
-def R64 : SPUVecReg<64, "$64">, DwarfRegNum<[64]>;
-def R65 : SPUVecReg<65, "$65">, DwarfRegNum<[65]>;
-def R66 : SPUVecReg<66, "$66">, DwarfRegNum<[66]>;
-def R67 : SPUVecReg<67, "$67">, DwarfRegNum<[67]>;
-def R68 : SPUVecReg<68, "$68">, DwarfRegNum<[68]>;
-def R69 : SPUVecReg<69, "$69">, DwarfRegNum<[69]>;
-def R70 : SPUVecReg<70, "$70">, DwarfRegNum<[70]>;
-def R71 : SPUVecReg<71, "$71">, DwarfRegNum<[71]>;
-def R72 : SPUVecReg<72, "$72">, DwarfRegNum<[72]>;
-def R73 : SPUVecReg<73, "$73">, DwarfRegNum<[73]>;
-def R74 : SPUVecReg<74, "$74">, DwarfRegNum<[74]>;
-def R75 : SPUVecReg<75, "$75">, DwarfRegNum<[75]>;
-def R76 : SPUVecReg<76, "$76">, DwarfRegNum<[76]>;
-def R77 : SPUVecReg<77, "$77">, DwarfRegNum<[77]>;
-def R78 : SPUVecReg<78, "$78">, DwarfRegNum<[78]>;
-def R79 : SPUVecReg<79, "$79">, DwarfRegNum<[79]>;
-def R80 : SPUVecReg<80, "$80">, DwarfRegNum<[80]>;
-def R81 : SPUVecReg<81, "$81">, DwarfRegNum<[81]>;
-def R82 : SPUVecReg<82, "$82">, DwarfRegNum<[82]>;
-def R83 : SPUVecReg<83, "$83">, DwarfRegNum<[83]>;
-def R84 : SPUVecReg<84, "$84">, DwarfRegNum<[84]>;
-def R85 : SPUVecReg<85, "$85">, DwarfRegNum<[85]>;
-def R86 : SPUVecReg<86, "$86">, DwarfRegNum<[86]>;
-def R87 : SPUVecReg<87, "$87">, DwarfRegNum<[87]>;
-def R88 : SPUVecReg<88, "$88">, DwarfRegNum<[88]>;
-def R89 : SPUVecReg<89, "$89">, DwarfRegNum<[89]>;
-def R90 : SPUVecReg<90, "$90">, DwarfRegNum<[90]>;
-def R91 : SPUVecReg<91, "$91">, DwarfRegNum<[91]>;
-def R92 : SPUVecReg<92, "$92">, DwarfRegNum<[92]>;
-def R93 : SPUVecReg<93, "$93">, DwarfRegNum<[93]>;
-def R94 : SPUVecReg<94, "$94">, DwarfRegNum<[94]>;
-def R95 : SPUVecReg<95, "$95">, DwarfRegNum<[95]>;
-def R96 : SPUVecReg<96, "$96">, DwarfRegNum<[96]>;
-def R97 : SPUVecReg<97, "$97">, DwarfRegNum<[97]>;
-def R98 : SPUVecReg<98, "$98">, DwarfRegNum<[98]>;
-def R99 : SPUVecReg<99, "$99">, DwarfRegNum<[99]>;
-def R100 : SPUVecReg<100, "$100">, DwarfRegNum<[100]>;
-def R101 : SPUVecReg<101, "$101">, DwarfRegNum<[101]>;
-def R102 : SPUVecReg<102, "$102">, DwarfRegNum<[102]>;
-def R103 : SPUVecReg<103, "$103">, DwarfRegNum<[103]>;
-def R104 : SPUVecReg<104, "$104">, DwarfRegNum<[104]>;
-def R105 : SPUVecReg<105, "$105">, DwarfRegNum<[105]>;
-def R106 : SPUVecReg<106, "$106">, DwarfRegNum<[106]>;
-def R107 : SPUVecReg<107, "$107">, DwarfRegNum<[107]>;
-def R108 : SPUVecReg<108, "$108">, DwarfRegNum<[108]>;
-def R109 : SPUVecReg<109, "$109">, DwarfRegNum<[109]>;
-def R110 : SPUVecReg<110, "$110">, DwarfRegNum<[110]>;
-def R111 : SPUVecReg<111, "$111">, DwarfRegNum<[111]>;
-def R112 : SPUVecReg<112, "$112">, DwarfRegNum<[112]>;
-def R113 : SPUVecReg<113, "$113">, DwarfRegNum<[113]>;
-def R114 : SPUVecReg<114, "$114">, DwarfRegNum<[114]>;
-def R115 : SPUVecReg<115, "$115">, DwarfRegNum<[115]>;
-def R116 : SPUVecReg<116, "$116">, DwarfRegNum<[116]>;
-def R117 : SPUVecReg<117, "$117">, DwarfRegNum<[117]>;
-def R118 : SPUVecReg<118, "$118">, DwarfRegNum<[118]>;
-def R119 : SPUVecReg<119, "$119">, DwarfRegNum<[119]>;
-def R120 : SPUVecReg<120, "$120">, DwarfRegNum<[120]>;
-def R121 : SPUVecReg<121, "$121">, DwarfRegNum<[121]>;
-def R122 : SPUVecReg<122, "$122">, DwarfRegNum<[122]>;
-def R123 : SPUVecReg<123, "$123">, DwarfRegNum<[123]>;
-def R124 : SPUVecReg<124, "$124">, DwarfRegNum<[124]>;
-def R125 : SPUVecReg<125, "$125">, DwarfRegNum<[125]>;
-def R126 : SPUVecReg<126, "$126">, DwarfRegNum<[126]>;
-def R127 : SPUVecReg<127, "$127">, DwarfRegNum<[127]>;
-
-/* Need floating point status register here: */
-/* def FPCSR : ... */
-
-// The SPU's registers as 128-bit wide entities, and can function as general
-// purpose registers, where the operands are in the "preferred slot":
-// The non-volatile registers are allocated in reverse order, like PPC does it.
-def GPRC : RegisterClass<"SPU", [i128], 128,
- (add (sequence "R%u", 0, 79),
- (sequence "R%u", 127, 80))>;
-
-// The SPU's registers as 64-bit wide (double word integer) "preferred slot":
-def R64C : RegisterClass<"SPU", [i64], 128, (add GPRC)>;
-
-// The SPU's registers as 64-bit wide (double word) FP "preferred slot":
-def R64FP : RegisterClass<"SPU", [f64], 128, (add GPRC)>;
-
-// The SPU's registers as 32-bit wide (word) "preferred slot":
-def R32C : RegisterClass<"SPU", [i32], 128, (add GPRC)>;
-
-// The SPU's registers as single precision floating point "preferred slot":
-def R32FP : RegisterClass<"SPU", [f32], 128, (add GPRC)>;
-
-// The SPU's registers as 16-bit wide (halfword) "preferred slot":
-def R16C : RegisterClass<"SPU", [i16], 128, (add GPRC)>;
-
-// The SPU's registers as 8-bit wide (byte) "preferred slot":
-def R8C : RegisterClass<"SPU", [i8], 128, (add GPRC)>;
-
-// The SPU's registers as vector registers:
-def VECREG : RegisterClass<"SPU", [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128,
- (add GPRC)>;
diff --git a/lib/Target/CellSPU/SPURegisterNames.h b/lib/Target/CellSPU/SPURegisterNames.h
deleted file mode 100644
index e557ed340a..0000000000
--- a/lib/Target/CellSPU/SPURegisterNames.h
+++ /dev/null
@@ -1,19 +0,0 @@
-//===- SPURegisterNames.h - Wrapper header for SPU register names -*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_REGISTER_NAMES_H
-#define SPU_REGISTER_NAMES_H
-
-// Define symbolic names for Cell registers. This defines a mapping from
-// register name to register number.
-//
-#define GET_REGINFO_ENUM
-#include "SPUGenRegisterInfo.inc"
-
-#endif
diff --git a/lib/Target/CellSPU/SPUSchedule.td b/lib/Target/CellSPU/SPUSchedule.td
deleted file mode 100644
index 9ccd0844e4..0000000000
--- a/lib/Target/CellSPU/SPUSchedule.td
+++ /dev/null
@@ -1,59 +0,0 @@
-//===-- SPUSchedule.td - Cell Scheduling Definitions -------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Even pipeline:
-
-def EVEN_UNIT : FuncUnit; // Even execution unit: (PC & 0x7 == 000)
-def ODD_UNIT : FuncUnit; // Odd execution unit: (PC & 0x7 == 100)
-
-//===----------------------------------------------------------------------===//
-// Instruction Itinerary classes used for Cell SPU
-//===----------------------------------------------------------------------===//
-
-def LoadStore : InstrItinClass; // ODD_UNIT
-def BranchHints : InstrItinClass; // ODD_UNIT
-def BranchResolv : InstrItinClass; // ODD_UNIT
-def ChanOpSPR : InstrItinClass; // ODD_UNIT
-def ShuffleOp : InstrItinClass; // ODD_UNIT
-def SelectOp : InstrItinClass; // ODD_UNIT
-def GatherOp : InstrItinClass; // ODD_UNIT
-def LoadNOP : InstrItinClass; // ODD_UNIT
-def ExecNOP : InstrItinClass; // EVEN_UNIT
-def SPrecFP : InstrItinClass; // EVEN_UNIT
-def DPrecFP : InstrItinClass; // EVEN_UNIT
-def FPInt : InstrItinClass; // EVEN_UNIT (FP<->integer)
-def ByteOp : InstrItinClass; // EVEN_UNIT
-def IntegerOp : InstrItinClass; // EVEN_UNIT
-def IntegerMulDiv: InstrItinClass; // EVEN_UNIT
-def RotShiftVec : InstrItinClass; // EVEN_UNIT Inter vector
-def RotShiftQuad : InstrItinClass; // ODD_UNIT Entire quad
-def ImmLoad : InstrItinClass; // EVEN_UNIT
-
-/* Note: The itinerary for the Cell SPU is somewhat contrived... */
-def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [], [
- InstrItinData<LoadStore , [InstrStage<6, [ODD_UNIT]>]>,
- InstrItinData<BranchHints , [InstrStage<6, [ODD_UNIT]>]>,
- InstrItinData<BranchResolv, [InstrStage<4, [ODD_UNIT]>]>,
- InstrItinData<ChanOpSPR , [InstrStage<6, [ODD_UNIT]>]>,
- InstrItinData<ShuffleOp , [InstrStage<4, [ODD_UNIT]>]>,
- InstrItinData<SelectOp , [InstrStage<4, [ODD_UNIT]>]>,
- InstrItinData<GatherOp , [InstrStage<4, [ODD_UNIT]>]>,
- InstrItinData<LoadNOP , [InstrStage<1, [ODD_UNIT]>]>,
- InstrItinData<ExecNOP , [InstrStage<1, [EVEN_UNIT]>]>,
- InstrItinData<SPrecFP , [InstrStage<6, [EVEN_UNIT]>]>,
- InstrItinData<DPrecFP , [InstrStage<13, [EVEN_UNIT]>]>,
- InstrItinData<FPInt , [InstrStage<2, [EVEN_UNIT]>]>,
- InstrItinData<ByteOp , [InstrStage<4, [EVEN_UNIT]>]>,
- InstrItinData<IntegerOp , [InstrStage<2, [EVEN_UNIT]>]>,
- InstrItinData<RotShiftVec , [InstrStage<4, [EVEN_UNIT]>]>,
- InstrItinData<RotShiftQuad, [InstrStage<4, [ODD_UNIT]>]>,
- InstrItinData<IntegerMulDiv,[InstrStage<7, [EVEN_UNIT]>]>,
- InstrItinData<ImmLoad , [InstrStage<2, [EVEN_UNIT]>]>
- ]>;
diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
deleted file mode 100644
index 5732fd43cd..0000000000
--- a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===-- SPUSelectionDAGInfo.cpp - CellSPU SelectionDAG Info ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SPUSelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "cellspu-selectiondag-info"
-#include "SPUTargetMachine.h"
-using namespace llvm;
-
-SPUSelectionDAGInfo::SPUSelectionDAGInfo(const SPUTargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
-}
-
-SPUSelectionDAGInfo::~SPUSelectionDAGInfo() {
-}
diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/lib/Target/CellSPU/SPUSelectionDAGInfo.h
deleted file mode 100644
index 39257d92c4..0000000000
--- a/lib/Target/CellSPU/SPUSelectionDAGInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- SPUSelectionDAGInfo.h - CellSPU SelectionDAG Info -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the CellSPU subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef CELLSPUSELECTIONDAGINFO_H
-#define CELLSPUSELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class SPUTargetMachine;
-
-class SPUSelectionDAGInfo : public TargetSelectionDAGInfo {
-public:
- explicit SPUSelectionDAGInfo(const SPUTargetMachine &TM);
- ~SPUSelectionDAGInfo();
-};
-
-}
-
-#endif
diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp
deleted file mode 100644
index eec2d250be..0000000000
--- a/lib/Target/CellSPU/SPUSubtarget.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-//===-- SPUSubtarget.cpp - STI Cell SPU Subtarget Information -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the CellSPU-specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUSubtarget.h"
-#include "SPU.h"
-#include "SPURegisterInfo.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_SUBTARGETINFO_TARGET_DESC
-#define GET_SUBTARGETINFO_CTOR
-#include "SPUGenSubtargetInfo.inc"
-
-using namespace llvm;
-
-SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS) :
- SPUGenSubtargetInfo(TT, CPU, FS),
- StackAlignment(16),
- ProcDirective(SPU::DEFAULT_PROC),
- UseLargeMem(false)
-{
- // Should be the target SPU processor type. For now, since there's only
- // one, simply default to the current "v0" default:
- std::string default_cpu("v0");
-
- // Parse features string.
- ParseSubtargetFeatures(default_cpu, FS);
-
- // Initialize scheduling itinerary for the specified CPU.
- InstrItins = getInstrItineraryForCPU(default_cpu);
-}
-
-/// SetJITMode - This is called to inform the subtarget info that we are
-/// producing code for the JIT.
-void SPUSubtarget::SetJITMode() {
-}
-
-/// Enable PostRA scheduling for optimization levels -O2 and -O3.
-bool SPUSubtarget::enablePostRAScheduler(
- CodeGenOpt::Level OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const {
- Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
- // CriticalPathsRCs seems to be the set of
- // RegisterClasses that antidep breakings are performed for.
- // Do it for all register classes
- CriticalPathRCs.clear();
- CriticalPathRCs.push_back(&SPU::R8CRegClass);
- CriticalPathRCs.push_back(&SPU::R16CRegClass);
- CriticalPathRCs.push_back(&SPU::R32CRegClass);
- CriticalPathRCs.push_back(&SPU::R32FPRegClass);
- CriticalPathRCs.push_back(&SPU::R64CRegClass);
- CriticalPathRCs.push_back(&SPU::VECREGRegClass);
- return OptLevel >= CodeGenOpt::Default;
-}
diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h
deleted file mode 100644
index 27d28b22dd..0000000000
--- a/lib/Target/CellSPU/SPUSubtarget.h
+++ /dev/null
@@ -1,97 +0,0 @@
-//===-- SPUSubtarget.h - Define Subtarget for the Cell SPU ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Cell SPU-specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef CELLSUBTARGET_H
-#define CELLSUBTARGET_H
-
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include <string>
-
-#define GET_SUBTARGETINFO_HEADER
-#include "SPUGenSubtargetInfo.inc"
-
-namespace llvm {
- class GlobalValue;
- class StringRef;
-
- namespace SPU {
- enum {
- PROC_NONE,
- DEFAULT_PROC
- };
- }
-
- class SPUSubtarget : public SPUGenSubtargetInfo {
- protected:
- /// stackAlignment - The minimum alignment known to hold of the stack frame
- /// on entry to the function and which must be maintained by every function.
- unsigned StackAlignment;
-
- /// Selected instruction itineraries (one entry per itinerary class.)
- InstrItineraryData InstrItins;
-
- /// Which SPU processor (this isn't really used, but it's there to keep
- /// the C compiler happy)
- unsigned ProcDirective;
-
- /// Use (assume) large memory -- effectively disables the LQA/STQA
- /// instructions that assume 259K local store.
- bool UseLargeMem;
-
- public:
- /// This constructor initializes the data members to match that
- /// of the specified triple.
- ///
- SPUSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS);
-
- /// ParseSubtargetFeatures - Parses features string setting specified
- /// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
-
- /// SetJITMode - This is called to inform the subtarget info that we are
- /// producing code for the JIT.
- void SetJITMode();
-
- /// getStackAlignment - Returns the minimum alignment known to hold of the
- /// stack frame on entry to the function and which must be maintained by
- /// every function for this subtarget.
- unsigned getStackAlignment() const { return StackAlignment; }
-
- /// getInstrItins - Return the instruction itineraies based on subtarget
- /// selection.
- const InstrItineraryData &getInstrItineraryData() const {
- return InstrItins;
- }
-
- /// Use large memory addressing predicate
- bool usingLargeMem() const {
- return UseLargeMem;
- }
-
- /// getDataLayoutString - Return the pointer size and type alignment
- /// properties of this subtarget.
- const char *getDataLayoutString() const {
- return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128"
- "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:64:128-v128:128:128"
- "-s:128:128-n32:64";
- }
-
- bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const;
- };
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
deleted file mode 100644
index 918316572a..0000000000
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Top-level implementation for the Cell SPU target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUTargetMachine.h"
-#include "SPU.h"
-#include "llvm/PassManager.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-extern "C" void LLVMInitializeCellSPUTarget() {
- // Register the target.
- RegisterTargetMachine<SPUTargetMachine> X(TheCellSPUTarget);
-}
-
-const std::pair<unsigned, int> *
-SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
- NumEntries = 1;
- return &LR[0];
-}
-
-SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS),
- DL(Subtarget.getDataLayoutString()),
- InstrInfo(*this),
- FrameLowering(Subtarget),
- TLInfo(*this),
- TSInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()),
- STTI(&TLInfo), VTTI(&TLInfo) {
-}
-
-//===----------------------------------------------------------------------===//
-// Pass Pipeline Configuration
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// SPU Code Generator Pass Configuration Options.
-class SPUPassConfig : public TargetPassConfig {
-public:
- SPUPassConfig(SPUTargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
-
- SPUTargetMachine &getSPUTargetMachine() const {
- return getTM<SPUTargetMachine>();
- }
-
- virtual bool addInstSelector();
- virtual bool addPreEmitPass();
-};
-} // namespace
-
-TargetPassConfig *SPUTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new SPUPassConfig(this, PM);
-}
-
-bool SPUPassConfig::addInstSelector() {
- // Install an instruction selector.
- addPass(createSPUISelDag(getSPUTargetMachine()));
- return false;
-}
-
-// passes to run just before printing the assembly
-bool SPUPassConfig::addPreEmitPass() {
- // load the TCE instruction scheduler, if available via
- // loaded plugins
- typedef llvm::FunctionPass* (*BuilderFunc)(const char*);
- BuilderFunc schedulerCreator =
- (BuilderFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol(
- "createTCESchedulerPass");
- if (schedulerCreator != NULL)
- addPass(schedulerCreator("cellspu"));
-
- //align instructions with nops/lnops for dual issue
- addPass(createSPUNopFillerPass(getSPUTargetMachine()));
- return true;
-}
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
deleted file mode 100644
index 7f53ea6fbe..0000000000
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ /dev/null
@@ -1,96 +0,0 @@
-//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the CellSPU-specific subclass of TargetMachine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SPU_TARGETMACHINE_H
-#define SPU_TARGETMACHINE_H
-
-#include "SPUSubtarget.h"
-#include "SPUInstrInfo.h"
-#include "SPUISelLowering.h"
-#include "SPUSelectionDAGInfo.h"
-#include "SPUFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/DataLayout.h"
-
-namespace llvm {
-
-/// SPUTargetMachine
-///
-class SPUTargetMachine : public LLVMTargetMachine {
- SPUSubtarget Subtarget;
- const DataLayout DL;
- SPUInstrInfo InstrInfo;
- SPUFrameLowering FrameLowering;
- SPUTargetLowering TLInfo;
- SPUSelectionDAGInfo TSInfo;
- InstrItineraryData InstrItins;
- ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
-public:
- SPUTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
-
- /// Return the subtarget implementation object
- virtual const SPUSubtarget *getSubtargetImpl() const {
- return &Subtarget;
- }
- virtual const SPUInstrInfo *getInstrInfo() const {
- return &InstrInfo;
- }
- virtual const SPUFrameLowering *getFrameLowering() const {
- return &FrameLowering;
- }
- /*!
- \note Cell SPU does not support JIT today. It could support JIT at some
- point.
- */
- virtual TargetJITInfo *getJITInfo() {
- return NULL;
- }
-
- virtual const SPUTargetLowering *getTargetLowering() const {
- return &TLInfo;
- }
-
- virtual const SPUSelectionDAGInfo* getSelectionDAGInfo() const {
- return &TSInfo;
- }
-
- virtual const SPURegisterInfo *getRegisterInfo() const {
- return &InstrInfo.getRegisterInfo();
- }
-
- virtual const DataLayout *getDataLayout() const {
- return &DL;
- }
-
- virtual const InstrItineraryData *getInstrItineraryData() const {
- return &InstrItins;
- }
- virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const {
- return &STTI;
- }
- virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const {
- return &VTTI;
- }
-
- // Pass Pipeline Configuration
- virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
deleted file mode 100644
index 6a98f95db6..0000000000
--- a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMCellSPUInfo
- CellSPUTargetInfo.cpp
- )
-
-add_dependencies(LLVMCellSPUInfo CellSPUCommonTableGen)
diff --git a/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp b/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
deleted file mode 100644
index 84aadfad6f..0000000000
--- a/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-//===-- CellSPUTargetInfo.cpp - CellSPU Target Implementation -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPU.h"
-#include "llvm/Module.h"
-#include "llvm/Support/TargetRegistry.h"
-using namespace llvm;
-
-Target llvm::TheCellSPUTarget;
-
-extern "C" void LLVMInitializeCellSPUTargetInfo() {
- RegisterTarget<Triple::cellspu>
- X(TheCellSPUTarget, "cellspu", "STI CBEA Cell SPU [experimental]");
-}
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 0f3efd8345..5c909903f9 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -518,7 +518,7 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
Out << "Attrs.push_back(PAWI);";
nl(Out);
}
- Out << name << "_PAL = AttrListPtr::get(Attrs);";
+ Out << name << "_PAL = AttrListPtr::get(mod->getContext(), Attrs);";
nl(Out);
out(); nl(Out);
Out << '}'; nl(Out);
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 1c891f14d8..aec1ed327f 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -608,7 +608,7 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
// TODO: Put this function along with the other isS* functions in
// HexagonISelDAGToDAG.cpp into a common file. Or better still, use the
-// functions defined in HexagonImmediates.td.
+// functions defined in HexagonOperands.td.
static bool Is_PostInc_S4_Offset(SDNode * S, int ShiftAmount) {
ConstantSDNode *N = cast<ConstantSDNode>(S);
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index a64c7a1816..71c620b131 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -27,6 +27,34 @@ def TypeSYSTEM : Type<7>;
def TypeXTYPE : Type<8>;
def TypeMARKER : Type<31>;
+// Maintain list of valid subtargets for each instruction.
+class SubTarget<bits<4> value> {
+ bits<4> Value = value;
+}
+
+def HasV2SubT : SubTarget<0xf>;
+def HasV2SubTOnly : SubTarget<0x1>;
+def NoV2SubT : SubTarget<0x0>;
+def HasV3SubT : SubTarget<0xe>;
+def HasV3SubTOnly : SubTarget<0x2>;
+def NoV3SubT : SubTarget<0x1>;
+def HasV4SubT : SubTarget<0xc>;
+def NoV4SubT : SubTarget<0x3>;
+def HasV5SubT : SubTarget<0x8>;
+def NoV5SubT : SubTarget<0x7>;
+
+// Addressing modes for load/store instructions
+class AddrModeType<bits<4> value> {
+ bits<4> Value = value;
+}
+
+def NoAddrMode : AddrModeType<0>; // No addressing mode
+def Absolute : AddrModeType<1>; // Absolute addressing mode
+def AbsoluteSet : AddrModeType<2>; // Absolute set addressing mode
+def BaseImmOffset : AddrModeType<3>; // Indirect with offset
+def BaseLongOffset : AddrModeType<4>; // Indirect with long offset
+def BaseRegOffset : AddrModeType<5>; // Indirect with register offset
+
//===----------------------------------------------------------------------===//
// Intruction Class Declaration +
//===----------------------------------------------------------------------===//
@@ -55,10 +83,38 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
// Predicated instructions.
bits<1> isPredicated = 0;
let TSFlags{6} = isPredicated;
+ bits<1> isPredicatedNew = 0;
+ let TSFlags{7} = isPredicatedNew;
+
+ // Stores that can be newified.
+ bits<1> isNVStorable = 0;
+ let TSFlags{8} = isNVStorable;
- // Dot new value store instructions.
+ // New-value store instructions.
bits<1> isNVStore = 0;
- let TSFlags{8} = isNVStore;
+ let TSFlags{9} = isNVStore;
+
+ // Immediate extender helper fields.
+ bits<1> isExtendable = 0;
+ let TSFlags{10} = isExtendable; // Insn may be extended.
+ bits<1> isExtended = 0;
+ let TSFlags{11} = isExtended; // Insn must be extended.
+ bits<3> opExtendable = 0;
+ let TSFlags{14-12} = opExtendable; // Which operand may be extended.
+ bits<1> isExtentSigned = 0;
+ let TSFlags{15} = isExtentSigned; // Signed or unsigned range.
+ bits<5> opExtentBits = 0;
+ let TSFlags{20-16} = opExtentBits; //Number of bits of range before extending.
+
+ // If an instruction is valid on a subtarget (v2-v5), set the corresponding
+ // bit from validSubTargets. v2 is the least significant bit.
+ // By default, instruction is valid on all subtargets.
+ SubTarget validSubTargets = HasV2SubT;
+ let TSFlags{24-21} = validSubTargets.Value;
+
+ // Addressing mode for load/store instrutions.
+ AddrModeType addrMode = NoAddrMode;
+ let TSFlags{28-25} = addrMode.Value;
// Fields used for relation models.
string BaseOpcode = "";
@@ -66,7 +122,10 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
string PredSense = "";
string PNewValue = "";
string InputType = ""; // Input is "imm" or "reg" type.
- // *** The code above must match HexagonBaseInfo.h ***
+ string isMEMri = "false"; // Set to "true" for load/store with MEMri operand.
+ string isFloat = "false"; // Set to "true" for the floating-point load/store.
+
+ // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 8435440308..c9e0025453 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -941,42 +941,36 @@ unsigned HexagonInstrInfo::getImmExtForm(const MachineInstr* MI) const {
case Hexagon::TFR_FI:
return Hexagon::TFR_FI_immext_V4;
- case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
case Hexagon::MEMw_ORr_indexed_MEM_V4 :
- case Hexagon::MEMw_ADDSUBi_MEM_V4 :
case Hexagon::MEMw_ADDi_MEM_V4 :
case Hexagon::MEMw_SUBi_MEM_V4 :
case Hexagon::MEMw_ADDr_MEM_V4 :
case Hexagon::MEMw_SUBr_MEM_V4 :
case Hexagon::MEMw_ANDr_MEM_V4 :
case Hexagon::MEMw_ORr_MEM_V4 :
- case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
case Hexagon::MEMh_ORr_indexed_MEM_V4 :
- case Hexagon::MEMh_ADDSUBi_MEM_V4 :
case Hexagon::MEMh_ADDi_MEM_V4 :
case Hexagon::MEMh_SUBi_MEM_V4 :
case Hexagon::MEMh_ADDr_MEM_V4 :
case Hexagon::MEMh_SUBr_MEM_V4 :
case Hexagon::MEMh_ANDr_MEM_V4 :
case Hexagon::MEMh_ORr_MEM_V4 :
- case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
case Hexagon::MEMb_ORr_indexed_MEM_V4 :
- case Hexagon::MEMb_ADDSUBi_MEM_V4 :
case Hexagon::MEMb_ADDi_MEM_V4 :
case Hexagon::MEMb_SUBi_MEM_V4 :
case Hexagon::MEMb_ADDr_MEM_V4 :
@@ -2391,14 +2385,12 @@ isValidOffset(const int Opcode, const int Offset) const {
return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
(Offset <= Hexagon_ADDI_OFFSET_MAX);
- case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
case Hexagon::MEMw_ORr_indexed_MEM_V4 :
- case Hexagon::MEMw_ADDSUBi_MEM_V4 :
case Hexagon::MEMw_ADDi_MEM_V4 :
case Hexagon::MEMw_SUBi_MEM_V4 :
case Hexagon::MEMw_ADDr_MEM_V4 :
@@ -2408,14 +2400,12 @@ isValidOffset(const int Opcode, const int Offset) const {
assert ((Offset % 4) == 0 && "MEMOPw offset is not aligned correctly." );
return (0 <= Offset && Offset <= 255);
- case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
case Hexagon::MEMh_ORr_indexed_MEM_V4 :
- case Hexagon::MEMh_ADDSUBi_MEM_V4 :
case Hexagon::MEMh_ADDi_MEM_V4 :
case Hexagon::MEMh_SUBi_MEM_V4 :
case Hexagon::MEMh_ADDr_MEM_V4 :
@@ -2425,14 +2415,12 @@ isValidOffset(const int Opcode, const int Offset) const {
assert ((Offset % 2) == 0 && "MEMOPh offset is not aligned correctly." );
return (0 <= Offset && Offset <= 127);
- case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
case Hexagon::MEMb_ORr_indexed_MEM_V4 :
- case Hexagon::MEMb_ADDSUBi_MEM_V4 :
case Hexagon::MEMb_ADDi_MEM_V4 :
case Hexagon::MEMb_SUBi_MEM_V4 :
case Hexagon::MEMb_ADDr_MEM_V4 :
@@ -2491,42 +2479,36 @@ isMemOp(const MachineInstr *MI) const {
switch (MI->getOpcode())
{
default: return false;
- case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
case Hexagon::MEMw_ORr_indexed_MEM_V4 :
- case Hexagon::MEMw_ADDSUBi_MEM_V4 :
case Hexagon::MEMw_ADDi_MEM_V4 :
case Hexagon::MEMw_SUBi_MEM_V4 :
case Hexagon::MEMw_ADDr_MEM_V4 :
case Hexagon::MEMw_SUBr_MEM_V4 :
case Hexagon::MEMw_ANDr_MEM_V4 :
case Hexagon::MEMw_ORr_MEM_V4 :
- case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
case Hexagon::MEMh_ORr_indexed_MEM_V4 :
- case Hexagon::MEMh_ADDSUBi_MEM_V4 :
case Hexagon::MEMh_ADDi_MEM_V4 :
case Hexagon::MEMh_SUBi_MEM_V4 :
case Hexagon::MEMh_ADDr_MEM_V4 :
case Hexagon::MEMh_SUBr_MEM_V4 :
case Hexagon::MEMh_ANDr_MEM_V4 :
case Hexagon::MEMh_ORr_MEM_V4 :
- case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
case Hexagon::MEMb_ORr_indexed_MEM_V4 :
- case Hexagon::MEMb_ADDSUBi_MEM_V4 :
case Hexagon::MEMb_ADDi_MEM_V4 :
case Hexagon::MEMb_SUBi_MEM_V4 :
case Hexagon::MEMb_ADDr_MEM_V4 :
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index 1d4a7060ad..ca21dbb4e1 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
include "HexagonInstrFormats.td"
-include "HexagonImmediates.td"
+include "HexagonOperands.td"
//===----------------------------------------------------------------------===//
// Classes used for relation maps.
@@ -26,6 +26,13 @@ class PredNewRel: PredRel;
// ImmRegRel - Filter class used to relate instructions having reg-reg form
// with their reg-imm counterparts.
class ImmRegRel;
+// NewValueRel - Filter class used to relate regular store instructions with
+// their new-value store form.
+class NewValueRel: PredNewRel;
+// NewValueRel - Filter class used to relate load/store instructions having
+// different addressing modes with each other.
+class AddrModeRel: NewValueRel;
+
//===----------------------------------------------------------------------===//
// Hexagon Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
@@ -818,8 +825,6 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1,
// LD +
//===----------------------------------------------------------------------===//
///
-/// Make sure that in post increment load, the first operand is always the post
-/// increment operand.
///
// Load doubleword.
let isPredicable = 1 in
@@ -850,12 +855,65 @@ def LDd_GP : LDInst2<(outs DoubleRegs:$dst),
[]>,
Requires<[NoV4T]>;
-let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDrid : LDInst2PI<(outs DoubleRegs:$dst, IntRegs:$dst2),
- (ins IntRegs:$src1, s4Imm:$offset),
- "$dst = memd($src1++#$offset)",
+//===----------------------------------------------------------------------===//
+// Post increment load
+// Make sure that in post increment load, the first operand is always the post
+// increment operand.
+//===----------------------------------------------------------------------===//
+
+multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
+ bit isNot, bit isPredNew> {
+ let PNewValue = #!if(isPredNew, "new", "") in
+ def #NAME# : LDInst2PI<(outs RC:$dst, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
+ #!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+ ") ")#"$dst = "#mnemonic#"($src2++#$offset)",
[],
- "$src1 = $dst2">;
+ "$src2 = $dst2">;
+}
+
+multiclass LD_PostInc_Pred<string mnemonic, RegisterClass RC,
+ Operand ImmOp, bit PredNot> {
+ let PredSense = #!if(PredNot, "false", "true") in {
+ defm _c#NAME# : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
+ // Predicate new
+ let Predicates = [HasV4T], validSubTargets = HasV4SubT in
+ defm _cdn#NAME#_V4 : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 1>;
+ }
+}
+
+multiclass LD_PostInc<string mnemonic, string BaseOp, RegisterClass RC,
+ Operand ImmOp> {
+
+ let BaseOpcode = "POST_"#BaseOp in {
+ let isPredicable = 1 in
+ def #NAME# : LDInst2PI<(outs RC:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, ImmOp:$offset),
+ "$dst = "#mnemonic#"($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+ let isPredicated = 1 in {
+ defm Pt : LD_PostInc_Pred<mnemonic, RC, ImmOp, 0 >;
+ defm NotPt : LD_PostInc_Pred<mnemonic, RC, ImmOp, 1 >;
+ }
+ }
+}
+
+let hasCtrlDep = 1, neverHasSideEffects = 1 in {
+ defm POST_LDrib : LD_PostInc<"memb", "LDrib", IntRegs, s4_0Imm>,
+ PredNewRel;
+ defm POST_LDriub : LD_PostInc<"memub", "LDriub", IntRegs, s4_0Imm>,
+ PredNewRel;
+ defm POST_LDrih : LD_PostInc<"memh", "LDrih", IntRegs, s4_1Imm>,
+ PredNewRel;
+ defm POST_LDriuh : LD_PostInc<"memuh", "LDriuh", IntRegs, s4_1Imm>,
+ PredNewRel;
+ defm POST_LDriw : LD_PostInc<"memw", "LDriw", IntRegs, s4_2Imm>,
+ PredNewRel;
+ defm POST_LDrid : LD_PostInc<"memd", "LDrid", DoubleRegs, s4_3Imm>,
+ PredNewRel;
+}
// Load doubleword conditionally.
let neverHasSideEffects = 1, isPredicated = 1 in
@@ -883,20 +941,6 @@ def LDrid_indexed_cNotPt : LDInst2<(outs DoubleRegs:$dst),
"if (!$src1) $dst = memd($src2+#$src3)",
[]>;
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrid_cPt : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
- "if ($src1) $dst1 = memd($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrid_cNotPt : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
- "if (!$src1) $dst1 = memd($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
let neverHasSideEffects = 1, isPredicated = 1 in
def LDrid_cdnPt : LDInst2<(outs DoubleRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
@@ -968,13 +1012,6 @@ def LDub_GP : LDInst2<(outs IntRegs:$dst),
[]>,
Requires<[NoV4T]>;
-let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDrib : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
- (ins IntRegs:$src1, s4Imm:$offset),
- "$dst = memb($src1++#$offset)",
- [],
- "$src1 = $dst2">;
-
// Load byte conditionally.
let neverHasSideEffects = 1, isPredicated = 1 in
def LDrib_cPt : LDInst2<(outs IntRegs:$dst),
@@ -1000,20 +1037,6 @@ def LDrib_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
"if (!$src1) $dst = memb($src2+#$src3)",
[]>;
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrib_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
- "if ($src1) $dst1 = memb($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrib_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
- "if (!$src1) $dst1 = memb($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
let neverHasSideEffects = 1, isPredicated = 1 in
def LDrib_cdnPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
@@ -1082,13 +1105,6 @@ def LDuh_GP : LDInst2<(outs IntRegs:$dst),
[]>,
Requires<[NoV4T]>;
-let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDrih : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
- (ins IntRegs:$src1, s4Imm:$offset),
- "$dst = memh($src1++#$offset)",
- [],
- "$src1 = $dst2">;
-
// Load halfword conditionally.
let neverHasSideEffects = 1, isPredicated = 1 in
def LDrih_cPt : LDInst2<(outs IntRegs:$dst),
@@ -1114,20 +1130,6 @@ def LDrih_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
"if (!$src1) $dst = memh($src2+#$src3)",
[]>;
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrih_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
- "if ($src1) $dst1 = memh($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrih_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
- "if (!$src1) $dst1 = memh($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
let neverHasSideEffects = 1, isPredicated = 1 in
def LDrih_cdnPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
@@ -1181,13 +1183,6 @@ def LDriub_GP : LDInst2<(outs IntRegs:$dst),
[]>,
Requires<[NoV4T]>;
-let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDriub : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
- (ins IntRegs:$src1, s4Imm:$offset),
- "$dst = memub($src1++#$offset)",
- [],
- "$src1 = $dst2">;
-
// Load unsigned byte conditionally.
let neverHasSideEffects = 1, isPredicated = 1 in
def LDriub_cPt : LDInst2<(outs IntRegs:$dst),
@@ -1213,20 +1208,6 @@ def LDriub_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
"if (!$src1) $dst = memub($src2+#$src3)",
[]>;
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriub_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
- "if ($src1) $dst1 = memub($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriub_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
- "if (!$src1) $dst1 = memub($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
let neverHasSideEffects = 1, isPredicated = 1 in
def LDriub_cdnPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
@@ -1274,13 +1255,6 @@ def LDriuh_GP : LDInst2<(outs IntRegs:$dst),
[]>,
Requires<[NoV4T]>;
-let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDriuh : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
- (ins IntRegs:$src1, s4Imm:$offset),
- "$dst = memuh($src1++#$offset)",
- [],
- "$src1 = $dst2">;
-
// Load unsigned halfword conditionally.
let neverHasSideEffects = 1, isPredicated = 1 in
def LDriuh_cPt : LDInst2<(outs IntRegs:$dst),
@@ -1306,20 +1280,6 @@ def LDriuh_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
"if (!$src1) $dst = memuh($src2+#$src3)",
[]>;
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriuh_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
- "if ($src1) $dst1 = memuh($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriuh_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
- "if (!$src1) $dst1 = memuh($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
let neverHasSideEffects = 1, isPredicated = 1 in
def LDriuh_cdnPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
@@ -1380,13 +1340,6 @@ def LDw_GP : LDInst2<(outs IntRegs:$dst),
[]>,
Requires<[NoV4T]>;
-let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
-def POST_LDriw : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2),
- (ins IntRegs:$src1, s4Imm:$offset),
- "$dst = memw($src1++#$offset)",
- [],
- "$src1 = $dst2">;
-
// Load word conditionally.
let neverHasSideEffects = 1, isPredicated = 1 in
@@ -1413,20 +1366,6 @@ def LDriw_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
"if (!$src1) $dst = memw($src2+#$src3)",
[]>;
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriw_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
- "if ($src1) $dst1 = memw($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriw_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
- "if (!$src1) $dst1 = memw($src2++#$src3)",
- [],
- "$src2 = $dst2">;
-
let neverHasSideEffects = 1, isPredicated = 1 in
def LDriw_cdnPt : LDInst2<(outs IntRegs:$dst),
(ins PredRegs:$src1, MEMri:$addr),
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 70448fc7af..b40fc418c1 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -1002,108 +1002,6 @@ def LDriw_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
[]>,
Requires<[HasV4T]>;
-// Rd=memw(Rt<<#u2+#U6)
-
-
-// Post-inc Load, Predicated, Dot new
-
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrid_cdnPt_V4 : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
- "if ($src1.new) $dst1 = memd($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrid_cdnNotPt_V4 : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
- "if (!$src1.new) $dst1 = memd($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrib_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
- "if ($src1.new) $dst1 = memb($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrib_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
- "if (!$src1.new) $dst1 = memb($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrih_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
- "if ($src1.new) $dst1 = memh($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDrih_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
- "if (!$src1.new) $dst1 = memh($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriub_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
- "if ($src1.new) $dst1 = memub($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriub_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
- "if (!$src1.new) $dst1 = memub($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriuh_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
- "if ($src1.new) $dst1 = memuh($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriuh_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
- "if (!$src1.new) $dst1 = memuh($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriw_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
- "if ($src1.new) $dst1 = memw($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
-let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
-def POST_LDriw_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
- "if (!$src1.new) $dst1 = memw($src2++#$src3)",
- [],
- "$src2 = $dst2">,
- Requires<[HasV4T]>;
-
/// Load from global offset
let isPredicable = 1, neverHasSideEffects = 1 in
@@ -4519,18 +4417,6 @@ def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
//===----------------------------------------------------------------------===//
-// MEMw_ADDSUBi_indexed_V4:
-// pseudo operation for MEMw_ADDi_indexed_V4 and
-// MEMw_SUBi_indexed_V4 a later pass will change it
-// to the corresponding pattern.
-let AddedComplexity = 30 in
-def MEMw_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_2Imm:$offset, m6Imm:$addend),
- "Error; should not emit",
- [(store (add (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
- m6ImmPred:$addend),
- (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
// memw(Rs+#u6:2) += #U5
let AddedComplexity = 30 in
@@ -4588,17 +4474,6 @@ def MEMw_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
(add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
Requires<[HasV4T, UseMEMOP]>;
-// MEMw_ADDSUBi_V4:
-// Pseudo operation for MEMw_ADDi_V4 and MEMw_SUBi_V4
-// a later pass will change it to the right pattern.
-let AddedComplexity = 30 in
-def MEMw_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, m6Imm:$addend),
- "Error; should not emit",
- [(store (add (load ADDRriU6_2:$addr), m6ImmPred:$addend),
- ADDRriU6_2:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
// memw(Rs+#u6:2) += #U5
let AddedComplexity = 30 in
def MEMw_ADDi_MEM_V4 : MEMInst_V4<(outs),
@@ -4676,20 +4551,6 @@ def MEMw_ORr_MEM_V4 : MEMInst_V4<(outs),
//===----------------------------------------------------------------------===//
-// MEMh_ADDSUBi_indexed_V4:
-// Pseudo operation for MEMh_ADDi_indexed_V4 and
-// MEMh_SUBi_indexed_V4 a later pass will change it
-// to the corresponding pattern.
-let AddedComplexity = 30 in
-def MEMh_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_1Imm:$offset, m6Imm:$addend),
- "Error; should not emit",
- [(truncstorei16 (add (sextloadi16 (add (i32 IntRegs:$base),
- u6_1ImmPred:$offset)),
- m6ImmPred:$addend),
- (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
// memh(Rs+#u6:1) += #U5
let AddedComplexity = 30 in
def MEMh_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
@@ -4750,17 +4611,6 @@ def MEMh_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
(add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
Requires<[HasV4T, UseMEMOP]>;
-// MEMh_ADDSUBi_V4:
-// Pseudo operation for MEMh_ADDi_V4 and MEMh_SUBi_V4
-// a later pass will change it to the right pattern.
-let AddedComplexity = 30 in
-def MEMh_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, m6Imm:$addend),
- "Error; should not emit",
- [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr),
- m6ImmPred:$addend), ADDRriU6_1:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
// memh(Rs+#u6:1) += #U5
let AddedComplexity = 30 in
def MEMh_ADDi_MEM_V4 : MEMInst_V4<(outs),
@@ -4838,21 +4688,6 @@ def MEMh_ORr_MEM_V4 : MEMInst_V4<(outs),
// MEMb_SETi_V4 : memb(Rs+#u6:0)=setbit(#U5)
//===----------------------------------------------------------------------===//
-
-// MEMb_ADDSUBi_indexed_V4:
-// Pseudo operation for MEMb_ADDi_indexed_V4 and
-// MEMb_SUBi_indexed_V4 a later pass will change it
-// to the corresponding pattern.
-let AddedComplexity = 30 in
-def MEMb_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
- (ins IntRegs:$base, u6_0Imm:$offset, m6Imm:$addend),
- "Error; should not emit",
- [(truncstorei8 (add (sextloadi8 (add (i32 IntRegs:$base),
- u6_0ImmPred:$offset)),
- m6ImmPred:$addend),
- (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
- Requires<[HasV4T, UseMEMOP]>;
-
// memb(Rs+#u6:0) += #U5
let AddedComplexity = 30 in
def MEMb_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
@@ -4913,17 +4748,6 @@ def MEMb_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
(add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
Requires<[HasV4T, UseMEMOP]>;
-// MEMb_ADDSUBi_V4:
-// Pseudo operation for MEMb_ADDi_V4 and MEMb_SUBi_V4
-// a later pass will change it to the right pattern.
-let AddedComplexity = 30 in
-def MEMb_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
- (ins MEMri:$addr, m6Imm:$addend),
- "Error; should not emit",
- [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr),
- m6ImmPred:$addend), ADDRriU6_0:$addr)]>,
- Requires<[HasV4T, UseMEMOP]>;
-
// memb(Rs+#u6:0) += #U5
let AddedComplexity = 30 in
def MEMb_ADDi_MEM_V4 : MEMInst_V4<(outs),
diff --git a/lib/Target/Hexagon/HexagonImmediates.td b/lib/Target/Hexagon/HexagonOperands.td
index 18692c4dcc..3f43d697fc 100644
--- a/lib/Target/Hexagon/HexagonImmediates.td
+++ b/lib/Target/Hexagon/HexagonOperands.td
@@ -1,4 +1,4 @@
-//===- HexagonImmediates.td - Hexagon immediate processing -*- tablegen -*-===//
+//===- HexagonOperands.td - Hexagon immediate processing -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,221 +7,65 @@
//
//===----------------------------------------------------------------------===//
-// From IA64's InstrInfo file
-def s32Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s16Imm : Operand<i32> {
- let PrintMethod = "printImmOperand";
-}
-
-def s12Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s11Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s11_0Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s11_1Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s11_2Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s11_3Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s10Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s9Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s8Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s8Imm64 : Operand<i64> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s6Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s4Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s4_0Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s4_1Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s4_2Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def s4_3Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u64Imm : Operand<i64> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u32Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u16Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u16_0Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u16_1Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u16_2Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u11_3Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u10Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u9Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u8Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u7Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u6Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u6_0Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u6_1Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u6_2Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u6_3Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u5Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u4Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u3Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u2Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def u1Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def n8Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def m6Imm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printImmOperand";
-}
-
-def nOneImm : Operand<i32> {
- // For now, we use a generic print function for all operands.
- let PrintMethod = "printNOneImmOperand";
-}
+// Immediate operands.
+
+let PrintMethod = "printImmOperand" in {
+ // f32Ext type is used to identify constant extended floating point immediates.
+ def f32Ext : Operand<f32>;
+ def s32Imm : Operand<i32>;
+ def s26_6Imm : Operand<i32>;
+ def s16Imm : Operand<i32>;
+ def s12Imm : Operand<i32>;
+ def s11Imm : Operand<i32>;
+ def s11_0Imm : Operand<i32>;
+ def s11_1Imm : Operand<i32>;
+ def s11_2Imm : Operand<i32>;
+ def s11_3Imm : Operand<i32>;
+ def s10Imm : Operand<i32>;
+ def s9Imm : Operand<i32>;
+ def m9Imm : Operand<i32>;
+ def s8Imm : Operand<i32>;
+ def s8Imm64 : Operand<i64>;
+ def s6Imm : Operand<i32>;
+ def s4Imm : Operand<i32>;
+ def s4_0Imm : Operand<i32>;
+ def s4_1Imm : Operand<i32>;
+ def s4_2Imm : Operand<i32>;
+ def s4_3Imm : Operand<i32>;
+ def u64Imm : Operand<i64>;
+ def u32Imm : Operand<i32>;
+ def u26_6Imm : Operand<i32>;
+ def u16Imm : Operand<i32>;
+ def u16_0Imm : Operand<i32>;
+ def u16_1Imm : Operand<i32>;
+ def u16_2Imm : Operand<i32>;
+ def u11_3Imm : Operand<i32>;
+ def u10Imm : Operand<i32>;
+ def u9Imm : Operand<i32>;
+ def u8Imm : Operand<i32>;
+ def u7Imm : Operand<i32>;
+ def u6Imm : Operand<i32>;
+ def u6_0Imm : Operand<i32>;
+ def u6_1Imm : Operand<i32>;
+ def u6_2Imm : Operand<i32>;
+ def u6_3Imm : Operand<i32>;
+ def u5Imm : Operand<i32>;
+ def u4Imm : Operand<i32>;
+ def u3Imm : Operand<i32>;
+ def u2Imm : Operand<i32>;
+ def u1Imm : Operand<i32>;
+ def n8Imm : Operand<i32>;
+ def m6Imm : Operand<i32>;
+}
+
+let PrintMethod = "printNOneImmOperand" in
+def nOneImm : Operand<i32>;
//
// Immediate predicates
//
def s32ImmPred : PatLeaf<(i32 imm), [{
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // s32ImmPred predicate - True if the immediate fits in a 32-bit sign extended
// field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<32>(v);
@@ -241,8 +85,16 @@ def s32_16s8ImmPred : PatLeaf<(i32 imm), [{
return isShiftedInt<24,16>(v);
}]>;
+def s26_6ImmPred : PatLeaf<(i32 imm), [{
+ // s26_6ImmPred predicate - True if the immediate fits in a 32-bit
+ // sign extended field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<26,6>(v);
+}]>;
+
+
def s16ImmPred : PatLeaf<(i32 imm), [{
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // s16ImmPred predicate - True if the immediate fits in a 16-bit sign extended
// field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<16>(v);
@@ -250,7 +102,7 @@ def s16ImmPred : PatLeaf<(i32 imm), [{
def s13ImmPred : PatLeaf<(i32 imm), [{
- // immS13 predicate - True if the immediate fits in a 13-bit sign extended
+ // s13ImmPred predicate - True if the immediate fits in a 13-bit sign extended
// field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<13>(v);
@@ -258,39 +110,39 @@ def s13ImmPred : PatLeaf<(i32 imm), [{
def s12ImmPred : PatLeaf<(i32 imm), [{
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
+ // s12ImmPred predicate - True if the immediate fits in a 12-bit
+ // sign extended field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<12>(v);
}]>;
def s11_0ImmPred : PatLeaf<(i32 imm), [{
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
+ // s11_0ImmPred predicate - True if the immediate fits in a 11-bit
+ // sign extended field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<11>(v);
}]>;
def s11_1ImmPred : PatLeaf<(i32 imm), [{
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
+ // s11_1ImmPred predicate - True if the immediate fits in a 12-bit
+ // sign extended field and is a multiple of 2.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<11,1>(v);
}]>;
def s11_2ImmPred : PatLeaf<(i32 imm), [{
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
+ // s11_2ImmPred predicate - True if the immediate fits in a 13-bit
+ // sign extended field and is a multiple of 4.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<11,2>(v);
}]>;
def s11_3ImmPred : PatLeaf<(i32 imm), [{
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
+ // s11_3ImmPred predicate - True if the immediate fits in a 14-bit
+ // sign extended field and is a multiple of 8.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<11,3>(v);
}]>;
@@ -311,6 +163,12 @@ def s9ImmPred : PatLeaf<(i32 imm), [{
return isInt<9>(v);
}]>;
+def m9ImmPred : PatLeaf<(i32 imm), [{
+ // m9ImmPred predicate - True if the immediate fits in a 9-bit magnitude
+ // field. The range of m9 is -255 to 255.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<9>(v) && (v != -256);
+}]>;
def s8ImmPred : PatLeaf<(i32 imm), [{
// s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
@@ -369,19 +227,23 @@ def s4_3ImmPred : PatLeaf<(i32 imm), [{
def u64ImmPred : PatLeaf<(i64 imm), [{
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
// Adding "N ||" to suppress gcc unused warning.
return (N || true);
}]>;
def u32ImmPred : PatLeaf<(i32 imm), [{
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
+ // u32ImmPred predicate - True if the immediate fits in a 32-bit field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<32>(v);
}]>;
+def u26_6ImmPred : PatLeaf<(i32 imm), [{
+ // u26_6ImmPred - True if the immediate fits in a 32-bit field and
+ // is a multiple of 64.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<26,6>(v);
+}]>;
+
def u16ImmPred : PatLeaf<(i32 imm), [{
// u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned
// field.
@@ -411,8 +273,14 @@ def u8ImmPred : PatLeaf<(i32 imm), [{
return isUInt<8>(v);
}]>;
+def u7StrictPosImmPred : ImmLeaf<i32, [{
+ // u7StrictPosImmPred predicate - True if the immediate fits in an 7-bit
+ // unsigned field and is strictly greater than 0.
+ return isUInt<7>(Imm) && Imm > 0;
+}]>;
+
def u7ImmPred : PatLeaf<(i32 imm), [{
- // u7ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+ // u7ImmPred predicate - True if the immediate fits in a 7-bit unsigned
// field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<7>(v);
@@ -434,21 +302,21 @@ def u6_0ImmPred : PatLeaf<(i32 imm), [{
}]>;
def u6_1ImmPred : PatLeaf<(i32 imm), [{
- // u6_1ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // u6_1ImmPred predicate - True if the immediate fits in a 7-bit unsigned
// field that is 1 bit alinged - multiple of 2.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<6,1>(v);
}]>;
def u6_2ImmPred : PatLeaf<(i32 imm), [{
- // u6_2ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // u6_2ImmPred predicate - True if the immediate fits in a 8-bit unsigned
// field that is 2 bits alinged - multiple of 4.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<6,2>(v);
}]>;
def u6_3ImmPred : PatLeaf<(i32 imm), [{
- // u6_3ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // u6_3ImmPred predicate - True if the immediate fits in a 9-bit unsigned
// field that is 3 bits alinged - multiple of 8.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<6,3>(v);
@@ -485,11 +353,27 @@ def u1ImmPred : PatLeaf<(i1 imm), [{
return isUInt<1>(v);
}]>;
-def m6ImmPred : PatLeaf<(i32 imm), [{
- // m6ImmPred predicate - True if the immediate is negative and fits in
- // a 6-bit negative number.
+def m5BImmPred : PatLeaf<(i32 imm), [{
+ // m5BImmPred predicate - True if the (char) number is in range -1 .. -31
+ // and will fit in a 5 bit field when made positive, for use in memops.
+ // this is specific to the zero extending of a negative by CombineInstr
+ int8_t v = (int8_t)N->getSExtValue();
+ return (-31 <= v && v <= -1);
+}]>;
+
+def m5HImmPred : PatLeaf<(i32 imm), [{
+ // m5HImmPred predicate - True if the (short) number is in range -1 .. -31
+ // and will fit in a 5 bit field when made positive, for use in memops.
+ // this is specific to the zero extending of a negative by CombineInstr
+ int16_t v = (int16_t)N->getSExtValue();
+ return (-31 <= v && v <= -1);
+}]>;
+
+def m5ImmPred : PatLeaf<(i32 imm), [{
+ // m5ImmPred predicate - True if the number is in range -1 .. -31
+ // and will fit in a 5 bit field when made positive, for use in memops.
int64_t v = (int64_t)N->getSExtValue();
- return isInt<6>(v);
+ return (-31 <= v && v <= -1);
}]>;
//InN means negative integers in [-(2^N - 1), 0]
@@ -506,3 +390,78 @@ def nOneImmPred : PatLeaf<(i32 imm), [{
return (-1 == v);
}]>;
+def Set5ImmPred : PatLeaf<(i32 imm), [{
+ // Set5ImmPred predicate - True if the number is in the series of values.
+ // [ 2^0, 2^1, ... 2^31 ]
+ // For use in setbit immediate.
+ uint32_t v = (int32_t)N->getSExtValue();
+ // Constrain to 32 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def Clr5ImmPred : PatLeaf<(i32 imm), [{
+ // Clr5ImmPred predicate - True if the number is in the series of
+ // bit negated values.
+ // [ 2^0, 2^1, ... 2^31 ]
+ // For use in clrbit immediate.
+ // Note: we are bit NOTing the value.
+ uint32_t v = ~ (int32_t)N->getSExtValue();
+ // Constrain to 32 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def SetClr5ImmPred : PatLeaf<(i32 imm), [{
+ // SetClr5ImmPred predicate - True if the immediate is in range 0..31.
+ int32_t v = (int32_t)N->getSExtValue();
+ return (v >= 0 && v <= 31);
+}]>;
+
+def Set4ImmPred : PatLeaf<(i32 imm), [{
+ // Set4ImmPred predicate - True if the number is in the series of values:
+ // [ 2^0, 2^1, ... 2^15 ].
+ // For use in setbit immediate.
+ uint16_t v = (int16_t)N->getSExtValue();
+ // Constrain to 16 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def Clr4ImmPred : PatLeaf<(i32 imm), [{
+ // Clr4ImmPred predicate - True if the number is in the series of
+ // bit negated values:
+ // [ 2^0, 2^1, ... 2^15 ].
+ // For use in setbit and clrbit immediate.
+ uint16_t v = ~ (int16_t)N->getSExtValue();
+ // Constrain to 16 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def SetClr4ImmPred : PatLeaf<(i32 imm), [{
+ // SetClr4ImmPred predicate - True if the immediate is in the range 0..15.
+ int16_t v = (int16_t)N->getSExtValue();
+ return (v >= 0 && v <= 15);
+}]>;
+
+def Set3ImmPred : PatLeaf<(i32 imm), [{
+ // Set3ImmPred predicate - True if the number is in the series of values:
+ // [ 2^0, 2^1, ... 2^7 ].
+ // For use in setbit immediate.
+ uint8_t v = (int8_t)N->getSExtValue();
+ // Constrain to 8 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def Clr3ImmPred : PatLeaf<(i32 imm), [{
+ // Clr3ImmPred predicate - True if the number is in the series of
+ // bit negated values:
+ // [ 2^0, 2^1, ... 2^7 ].
+ // For use in setbit and clrbit immediate.
+ uint8_t v = ~ (int8_t)N->getSExtValue();
+ // Constrain to 8 bits, and then check for single bit.
+ return ImmIsSingleBit(v);
+}]>;
+
+def SetClr3ImmPred : PatLeaf<(i32 imm), [{
+ // SetClr3ImmPred predicate - True if the immediate is in the range 0..7.
+ int8_t v = (int8_t)N->getSExtValue();
+ return (v >= 0 && v <= 7);
+}]>;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index 7221e90634..9fc826f412 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -43,7 +43,27 @@ namespace HexagonII {
TypeMARKER = 31 // Such as end of a HW loop.
};
+ enum SubTarget {
+ HasV2SubT = 0xf,
+ HasV2SubTOnly = 0x1,
+ NoV2SubT = 0x0,
+ HasV3SubT = 0xe,
+ HasV3SubTOnly = 0x2,
+ NoV3SubT = 0x1,
+ HasV4SubT = 0xc,
+ NoV4SubT = 0x3,
+ HasV5SubT = 0x8,
+ NoV5SubT = 0x7
+ };
+ enum AddrMode {
+ NoAddrMode = 0, // No addressing mode
+ Absolute = 1, // Absolute addressing mode
+ AbsoluteSet = 2, // Absolute set addressing mode
+ BaseImmOffset = 3, // Indirect with offset
+ BaseLongOffset = 4, // Indirect with long offset
+ BaseRegOffset = 5 // Indirect with register offset
+ };
// MCInstrDesc TSFlags
// *** Must match HexagonInstrFormat*.td ***
@@ -58,8 +78,47 @@ namespace HexagonII {
// Predicated instructions.
PredicatedPos = 6,
- PredicatedMask = 0x1
- };
+ PredicatedMask = 0x1,
+ PredicatedNewPos = 7,
+ PredicatedNewMask = 0x1,
+
+ // Stores that can be newified.
+ mayNVStorePos = 8,
+ mayNVStoreMask = 0x1,
+
+ // Dot new value store instructions.
+ NVStorePos = 9,
+ NVStoreMask = 0x1,
+
+ // Extendable insns.
+ ExtendablePos = 10,
+ ExtendableMask = 0x1,
+
+ // Insns must be extended.
+ ExtendedPos = 11,
+ ExtendedMask = 0x1,
+
+ // Which operand may be extended.
+ ExtendableOpPos = 12,
+ ExtendableOpMask = 0x7,
+
+ // Signed or unsigned range.
+ ExtentSignedPos = 15,
+ ExtentSignedMask = 0x1,
+
+ // Number of bits of range before extending operand.
+ ExtentBitsPos = 16,
+ ExtentBitsMask = 0x1f,
+
+ // Valid subtargets
+ validSubTargetPos = 21,
+ validSubTargetMask = 0xf,
+
+ // Addressing mode for load/store instructions
+ AddrModePos = 25,
+ AddrModeMask = 0xf
+
+ };
// *** The code above must match HexagonInstrFormat*.td *** //
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
index 8995080974..eb6c779f45 100644
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore
+subdirectories = ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore
; This is a special group whose required libraries are extended (by llvm-build)
; with the best execution engine (the native JIT, if available, or the
diff --git a/lib/Target/MSP430/MSP430CallingConv.td b/lib/Target/MSP430/MSP430CallingConv.td
index ad27cc9122..b448cc4ed9 100644
--- a/lib/Target/MSP430/MSP430CallingConv.td
+++ b/lib/Target/MSP430/MSP430CallingConv.td
@@ -24,6 +24,9 @@ def RetCC_MSP430 : CallingConv<[
// MSP430 Argument Calling Conventions
//===----------------------------------------------------------------------===//
def CC_MSP430 : CallingConv<[
+ // Pass by value if the byval attribute is given
+ CCIfByVal<CCPassByVal<2, 2>>,
+
// Promote i8 arguments to i16.
CCIfType<[i8], CCPromoteToType<i16>>,
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index fc677aec38..c79e5f181b 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -164,6 +164,12 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
setOperationAction(ISD::SREM, MVT::i16, Expand);
+ // varargs support
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+
// Libcalls names.
if (HWMultMode == HWMultIntr) {
setLibcallName(RTLIB::MUL_I8, "__mulqi3hw");
@@ -192,6 +198,7 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op,
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
default:
llvm_unreachable("unimplemented operand");
}
@@ -297,7 +304,6 @@ MSP430TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
/// LowerCCCArguments - transform physical registers into virtual registers and
/// generate load operations for arguments places on the stack.
// FIXME: struct return stuff
-// FIXME: varargs
SDValue
MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
CallingConv::ID CallConv,
@@ -311,6 +317,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -318,7 +325,11 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
getTargetMachine(), ArgLocs, *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_MSP430);
- assert(!isVarArg && "Varargs not supported yet");
+ // Create frame index for the start of the first vararg value
+ if (isVarArg) {
+ unsigned Offset = CCInfo.getNextStackOffset();
+ FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, Offset, true));
+ }
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -357,22 +368,34 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
} else {
// Sanity check
assert(VA.isMemLoc());
- // Load the argument to a virtual register
- unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
- if (ObjSize > 2) {
- errs() << "LowerFormalArguments Unhandled argument type: "
- << EVT(VA.getLocVT()).getEVTString()
- << "\n";
+
+ SDValue InVal;
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+
+ if (Flags.isByVal()) {
+ int FI = MFI->CreateFixedObject(Flags.getByValSize(),
+ VA.getLocMemOffset(), true);
+ InVal = DAG.getFrameIndex(FI, getPointerTy());
+ } else {
+ // Load the argument to a virtual register
+ unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
+ if (ObjSize > 2) {
+ errs() << "LowerFormalArguments Unhandled argument type: "
+ << EVT(VA.getLocVT()).getEVTString()
+ << "\n";
+ }
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
+ InVal = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
}
- // Create the frame index object for this incoming parameter...
- int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
-
- // Create the SelectionDAG nodes corresponding to a load
- //from this parameter
- SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
- InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0));
+
+ InVals.push_back(InVal);
}
}
@@ -498,9 +521,23 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
StackPtr,
DAG.getIntPtrConstant(VA.getLocMemOffset()));
+ SDValue MemOp;
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ if (Flags.isByVal()) {
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i16);
+ MemOp = DAG.getMemcpy(Chain, dl, PtrOff, Arg, SizeNode,
+ Flags.getByValAlign(),
+ /*isVolatile*/false,
+ /*AlwaysInline=*/true,
+ MachinePointerInfo(),
+ MachinePointerInfo());
+ } else {
+ MemOp = DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(),
+ false, false, 0);
+ }
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
- MachinePointerInfo(),false, false, 0));
+ MemOpChains.push_back(MemOp);
}
}
@@ -931,6 +968,22 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
return FrameAddr;
}
+SDValue MSP430TargetLowering::LowerVASTART(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
+
+ // Frame index of first vararg argument
+ SDValue FrameIndex = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ getPointerTy());
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+ // Create a store of the frame index to the location operand
+ return DAG.getStore(Op.getOperand(0), Op.getDebugLoc(), FrameIndex,
+ Op.getOperand(1), MachinePointerInfo(SV),
+ false, false, 0);
+}
+
/// getPostIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if this node can be
/// combined with a load / store to form a post-indexed load / store.
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 991304c23d..bf021eaac5 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -92,6 +92,7 @@ namespace llvm {
SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
TargetLowering::ConstraintType
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index 632d6dee27..d1697f478c 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -30,6 +30,9 @@ class MSP430MachineFunctionInfo : public MachineFunctionInfo {
/// ReturnAddrIndex - FrameIndex for return slot.
int ReturnAddrIndex;
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
public:
MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {}
@@ -41,6 +44,9 @@ public:
int getRAIndex() const { return ReturnAddrIndex; }
void setRAIndex(int Index) { ReturnAddrIndex = Index; }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex;}
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
};
} // End llvm namespace
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index b38463de4b..68d3ac5f3b 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -128,6 +128,10 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
case MCSymbolRefExpr::VK_Mips_GOT_OFST: OS << "%got_ofst("; break;
case MCSymbolRefExpr::VK_Mips_HIGHER: OS << "%higher("; break;
case MCSymbolRefExpr::VK_Mips_HIGHEST: OS << "%highest("; break;
+ case MCSymbolRefExpr::VK_Mips_GOT_HI16: OS << "%got_hi("; break;
+ case MCSymbolRefExpr::VK_Mips_GOT_LO16: OS << "%got_lo("; break;
+ case MCSymbolRefExpr::VK_Mips_CALL_HI16: OS << "%call_hi("; break;
+ case MCSymbolRefExpr::VK_Mips_CALL_LO16: OS << "%call_lo("; break;
}
OS << SRE->getSymbol();
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 9a94c75e2f..2e5092c13c 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -43,6 +43,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case Mips::fixup_Mips_GOT_PAGE:
case Mips::fixup_Mips_GOT_OFST:
case Mips::fixup_Mips_GOT_DISP:
+ case Mips::fixup_Mips_GOT_LO16:
+ case Mips::fixup_Mips_CALL_LO16:
break;
case Mips::fixup_Mips_PC16:
// So far we are only using this type for branches.
@@ -61,6 +63,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
break;
case Mips::fixup_Mips_HI16:
case Mips::fixup_Mips_GOT_Local:
+ case Mips::fixup_Mips_GOT_HI16:
+ case Mips::fixup_Mips_CALL_HI16:
// Get the 2nd 16-bits. Also add 1 if bit 15 is 1.
Value = ((Value + 0x8000) >> 16) & 0xffff;
break;
@@ -180,7 +184,11 @@ public:
{ "fixup_Mips_GOT_OFST", 0, 16, 0 },
{ "fixup_Mips_GOT_DISP", 0, 16, 0 },
{ "fixup_Mips_HIGHER", 0, 16, 0 },
- { "fixup_Mips_HIGHEST", 0, 16, 0 }
+ { "fixup_Mips_HIGHEST", 0, 16, 0 },
+ { "fixup_Mips_GOT_HI16", 0, 16, 0 },
+ { "fixup_Mips_GOT_LO16", 0, 16, 0 },
+ { "fixup_Mips_CALL_HI16", 0, 16, 0 },
+ { "fixup_Mips_CALL_LO16", 0, 16, 0 }
};
if (Kind < FirstTargetFixupKind)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index 233214b461..94e0d20d88 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -84,7 +84,13 @@ namespace MipsII {
/// MO_HIGHER/HIGHEST - Represents the highest or higher half word of a
/// 64-bit symbol address.
MO_HIGHER,
- MO_HIGHEST
+ MO_HIGHEST,
+
+ /// MO_GOT_HI16/LO16, MO_CALL_HI16/LO16 - Relocations used for large GOTs.
+ MO_GOT_HI16,
+ MO_GOT_LO16,
+ MO_CALL_HI16,
+ MO_CALL_LO16
};
enum {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 6ad8669d04..3fe30b0b49 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -190,6 +190,18 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
case Mips::fixup_Mips_HIGHEST:
Type = ELF::R_MIPS_HIGHEST;
break;
+ case Mips::fixup_Mips_GOT_HI16:
+ Type = ELF::R_MIPS_GOT_HI16;
+ break;
+ case Mips::fixup_Mips_GOT_LO16:
+ Type = ELF::R_MIPS_GOT_LO16;
+ break;
+ case Mips::fixup_Mips_CALL_HI16:
+ Type = ELF::R_MIPS_CALL_HI16;
+ break;
+ case Mips::fixup_Mips_CALL_LO16:
+ Type = ELF::R_MIPS_CALL_LO16;
+ break;
}
return Type;
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index 77faec54fb..f96390043a 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -116,6 +116,18 @@ namespace Mips {
// resulting in - R_MIPS_HIGHEST
fixup_Mips_HIGHEST,
+ // resulting in - R_MIPS_GOT_HI16
+ fixup_Mips_GOT_HI16,
+
+ // resulting in - R_MIPS_GOT_LO16
+ fixup_Mips_GOT_LO16,
+
+ // resulting in - R_MIPS_CALL_HI16
+ fixup_Mips_CALL_HI16,
+
+ // resulting in - R_MIPS_CALL_LO16
+ fixup_Mips_CALL_LO16,
+
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 7fbdae02f4..da1e4552c9 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -287,6 +287,18 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
case MCSymbolRefExpr::VK_Mips_HIGHEST:
FixupKind = Mips::fixup_Mips_HIGHEST;
break;
+ case MCSymbolRefExpr::VK_Mips_GOT_HI16:
+ FixupKind = Mips::fixup_Mips_GOT_HI16;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOT_LO16:
+ FixupKind = Mips::fixup_Mips_GOT_LO16;
+ break;
+ case MCSymbolRefExpr::VK_Mips_CALL_HI16:
+ FixupKind = Mips::fixup_Mips_CALL_HI16;
+ break;
+ case MCSymbolRefExpr::VK_Mips_CALL_LO16:
+ FixupKind = Mips::fixup_Mips_CALL_LO16;
+ break;
} // switch
Fixups.push_back(MCFixup::Create(0, MO.getExpr(), MCFixupKind(FixupKind)));
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 90c01d5de0..31194ae610 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -60,16 +60,19 @@ def FeatureSwap : SubtargetFeature<"swap", "HasSwap", "true",
"Enable 'byte/half swap' instructions.">;
def FeatureBitCount : SubtargetFeature<"bitcount", "HasBitCount", "true",
"Enable 'count leading bits' instructions.">;
+def FeatureFPIdx : SubtargetFeature<"FPIdx", "HasFPIdx", "true",
+ "Enable 'FP indexed load/store' instructions.">;
def FeatureMips32 : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32",
"Mips32 ISA Support",
[FeatureCondMov, FeatureBitCount]>;
def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion",
"Mips32r2", "Mips32r2 ISA Support",
- [FeatureMips32, FeatureSEInReg, FeatureSwap]>;
+ [FeatureMips32, FeatureSEInReg, FeatureSwap,
+ FeatureFPIdx]>;
def FeatureMips64 : SubtargetFeature<"mips64", "MipsArchVersion",
"Mips64", "Mips64 ISA Support",
[FeatureGP64Bit, FeatureFP64Bit,
- FeatureMips32]>;
+ FeatureMips32, FeatureFPIdx]>;
def FeatureMips64r2 : SubtargetFeature<"mips64r2", "MipsArchVersion",
"Mips64r2", "Mips64r2 ISA Support",
[FeatureMips64, FeatureMips32r2]>;
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index a6111689c7..83322eac8c 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -255,6 +255,7 @@ def : MipsPat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>;
def : MipsPat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>;
def : MipsPat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>;
def : MipsPat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>;
+def : MipsPat<(MipsHi texternalsym:$in), (LUi64 texternalsym:$in)>;
def : MipsPat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>;
def : MipsPat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>;
@@ -262,6 +263,7 @@ def : MipsPat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>;
def : MipsPat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>;
def : MipsPat<(MipsLo tglobaltlsaddr:$in),
(DADDiu ZERO_64, tglobaltlsaddr:$in)>;
+def : MipsPat<(MipsLo texternalsym:$in), (DADDiu ZERO_64, texternalsym:$in)>;
def : MipsPat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)),
(DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>;
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index d014ba1792..e71c9327f3 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -243,31 +243,48 @@ bool Filler::delayHasHazard(InstrIter candidate,
return false;
}
+// Helper function for getting a MachineOperand's register number and adding it
+// to RegDefs or RegUses.
+static void insertDefUse(const MachineOperand &MO,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses,
+ unsigned ExcludedReg = 0) {
+ unsigned Reg;
+
+ if (!MO.isReg() || !(Reg = MO.getReg()) || (Reg == ExcludedReg))
+ return;
+
+ if (MO.isDef())
+ RegDefs.insert(Reg);
+ else if (MO.isUse())
+ RegUses.insert(Reg);
+}
+
// Insert Defs and Uses of MI into the sets RegDefs and RegUses.
void Filler::insertDefsUses(InstrIter MI,
SmallSet<unsigned, 32> &RegDefs,
SmallSet<unsigned, 32> &RegUses) {
- // If MI is a call or return, just examine the explicit non-variadic operands.
- MCInstrDesc MCID = MI->getDesc();
- unsigned e = MI->isCall() || MI->isReturn() ? MCID.getNumOperands() :
- MI->getNumOperands();
+ unsigned I, E = MI->getDesc().getNumOperands();
- // Add RA to RegDefs to prevent users of RA from going into delay slot.
- if (MI->isCall())
- RegDefs.insert(Mips::RA);
+ for (I = 0; I != E; ++I)
+ insertDefUse(MI->getOperand(I), RegDefs, RegUses);
- for (unsigned i = 0; i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- unsigned Reg;
+ // If MI is a call, add RA to RegDefs to prevent users of RA from going into
+ // delay slot.
+ if (MI->isCall()) {
+ RegDefs.insert(Mips::RA);
+ return;
+ }
- if (!MO.isReg() || !(Reg = MO.getReg()))
- continue;
+ // Return if MI is a return.
+ if (MI->isReturn())
+ return;
- if (MO.isDef())
- RegDefs.insert(Reg);
- else if (MO.isUse())
- RegUses.insert(Reg);
- }
+ // Examine the implicit operands. Exclude register AT which is in the list of
+ // clobbered registers of branch instructions.
+ E = MI->getNumOperands();
+ for (; I != E; ++I)
+ insertDefUse(MI->getOperand(I), RegDefs, RegUses, Mips::AT);
}
//returns true if the Reg or its alias is in the RegSet.
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 778fe34275..c925ab437f 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -390,7 +390,7 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
if (LS &&
(LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
- Subtarget.hasMips32r2Or64() && !Subtarget.isTargetNaCl()/*@LOCALMOD*/)
+ Subtarget.hasFPIdx() && !Subtarget.isTargetNaCl()/*@LOCALMOD*/)
return false;
}
@@ -500,7 +500,7 @@ bool MipsDAGToDAGISel::SelectAddr16(
if (LS &&
(LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
- Subtarget.hasMips32r2Or64())
+ Subtarget.hasFPIdx())
return false;
}
Base = Addr;
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 32cf6c8be7..2e7b144165 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -46,6 +46,10 @@ static cl::opt<bool>
EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
cl::desc("MIPS: Enable tail calls."), cl::init(false));
+static cl::opt<bool>
+LargeGOT("mxgot", cl::Hidden,
+ cl::desc("MIPS: Enable GOT larger than 64k."), cl::init(false));
+
static const uint16_t O32IntRegs[4] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
};
@@ -77,6 +81,71 @@ static SDValue GetGlobalReg(SelectionDAG &DAG, EVT Ty) {
return DAG.getRegister(FI->getGlobalBaseReg(), Ty);
}
+static SDValue getTargetNode(SDValue Op, SelectionDAG &DAG, unsigned Flag) {
+ EVT Ty = Op.getValueType();
+
+ if (GlobalAddressSDNode *N = dyn_cast<GlobalAddressSDNode>(Op))
+ return DAG.getTargetGlobalAddress(N->getGlobal(), Op.getDebugLoc(), Ty, 0,
+ Flag);
+ if (ExternalSymbolSDNode *N = dyn_cast<ExternalSymbolSDNode>(Op))
+ return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag);
+ if (BlockAddressSDNode *N = dyn_cast<BlockAddressSDNode>(Op))
+ return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
+ if (JumpTableSDNode *N = dyn_cast<JumpTableSDNode>(Op))
+ return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
+ if (ConstantPoolSDNode *N = dyn_cast<ConstantPoolSDNode>(Op))
+ return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
+ N->getOffset(), Flag);
+
+ llvm_unreachable("Unexpected node type.");
+ return SDValue();
+}
+
+static SDValue getAddrNonPIC(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT Ty = Op.getValueType();
+ SDValue Hi = getTargetNode(Op, DAG, MipsII::MO_ABS_HI);
+ SDValue Lo = getTargetNode(Op, DAG, MipsII::MO_ABS_LO);
+ return DAG.getNode(ISD::ADD, DL, Ty,
+ DAG.getNode(MipsISD::Hi, DL, Ty, Hi),
+ DAG.getNode(MipsISD::Lo, DL, Ty, Lo));
+}
+
+static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT Ty = Op.getValueType();
+ unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
+ SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, GetGlobalReg(DAG, Ty),
+ getTargetNode(Op, DAG, GOTFlag));
+ SDValue Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT,
+ MachinePointerInfo::getGOT(), false, false, false,
+ 0);
+ unsigned LoFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
+ SDValue Lo = DAG.getNode(MipsISD::Lo, DL, Ty, getTargetNode(Op, DAG, LoFlag));
+ return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo);
+}
+
+static SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT Ty = Op.getValueType();
+ SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, GetGlobalReg(DAG, Ty),
+ getTargetNode(Op, DAG, Flag));
+ return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Tgt,
+ MachinePointerInfo::getGOT(), false, false, false, 0);
+}
+
+static SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
+ unsigned HiFlag, unsigned LoFlag) {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT Ty = Op.getValueType();
+ SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(Op, DAG, HiFlag));
+ Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, GetGlobalReg(DAG, Ty));
+ SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi,
+ getTargetNode(Op, DAG, LoFlag));
+ return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Wrapper,
+ MachinePointerInfo::getGOT(), false, false, false, 0);
+}
+
const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
case MipsISD::JmpLink: return "MipsISD::JmpLink";
@@ -1754,8 +1823,6 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
- SDVTList VTs = DAG.getVTList(MVT::i32);
-
const MipsTargetObjectFile &TLOF =
(const MipsTargetObjectFile&)getObjFileLowering();
@@ -1763,69 +1830,33 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
MipsII::MO_GPREL);
- SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, &GA, 1);
+ SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl,
+ DAG.getVTList(MVT::i32), &GA, 1);
SDValue GPReg = DAG.getRegister(Mips::GP, MVT::i32);
return DAG.getNode(ISD::ADD, dl, MVT::i32, GPReg, GPRelNode);
}
+
// %hi/%lo relocation
- SDValue GAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
- MipsII::MO_ABS_HI);
- SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
- MipsII::MO_ABS_LO);
- SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GAHi, 1);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo);
- return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
- }
-
- EVT ValTy = Op.getValueType();
- bool HasGotOfst = (GV->hasInternalLinkage() ||
- (GV->hasLocalLinkage() && !isa<Function>(GV)));
- unsigned GotFlag = HasMips64 ?
- (HasGotOfst ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT_DISP) :
- (HasGotOfst ? MipsII::MO_GOT : MipsII::MO_GOT16);
- SDValue GA = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, GotFlag);
- GA = DAG.getNode(MipsISD::Wrapper, dl, ValTy, GetGlobalReg(DAG, ValTy), GA);
- SDValue ResNode = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), GA,
- MachinePointerInfo(), false, false, false, 0);
- // On functions and global targets not internal linked only
- // a load from got/GP is necessary for PIC to work.
- if (!HasGotOfst)
- return ResNode;
- SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0,
- HasMips64 ? MipsII::MO_GOT_OFST :
- MipsII::MO_ABS_LO);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, GALo);
- return DAG.getNode(ISD::ADD, dl, ValTy, ResNode, Lo);
+ return getAddrNonPIC(Op, DAG);
+ }
+
+ if (GV->hasInternalLinkage() || (GV->hasLocalLinkage() && !isa<Function>(GV)))
+ return getAddrLocal(Op, DAG, HasMips64);
+
+ if (LargeGOT)
+ return getAddrGlobalLargeGOT(Op, DAG, MipsII::MO_GOT_HI16,
+ MipsII::MO_GOT_LO16);
+
+ return getAddrGlobal(Op, DAG,
+ HasMips64 ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16);
}
SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
- const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- // FIXME there isn't actually debug info here
- DebugLoc dl = Op.getDebugLoc();
-
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
- // %hi/%lo relocation
- SDValue BAHi =
- DAG.getTargetBlockAddress(BA, MVT::i32, 0, MipsII::MO_ABS_HI);
- SDValue BALo =
- DAG.getTargetBlockAddress(BA, MVT::i32, 0, MipsII::MO_ABS_LO);
- SDValue Hi = DAG.getNode(MipsISD::Hi, dl, MVT::i32, BAHi);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALo);
- return DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
- }
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64)
+ return getAddrNonPIC(Op, DAG);
- EVT ValTy = Op.getValueType();
- unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
- unsigned OFSTFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
- SDValue BAGOTOffset = DAG.getTargetBlockAddress(BA, ValTy, 0, GOTFlag);
- BAGOTOffset = DAG.getNode(MipsISD::Wrapper, dl, ValTy,
- GetGlobalReg(DAG, ValTy), BAGOTOffset);
- SDValue BALOOffset = DAG.getTargetBlockAddress(BA, ValTy, 0, OFSTFlag);
- SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), BAGOTOffset,
- MachinePointerInfo(), false, false, false, 0);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, BALOOffset);
- return DAG.getNode(ISD::ADD, dl, ValTy, Load, Lo);
+ return getAddrLocal(Op, DAG, HasMips64);
}
// @LOCALMOD-BEGIN
@@ -1962,41 +1993,15 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
SDValue MipsTargetLowering::
LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
{
- SDValue HiPart, JTI, JTILo;
- // FIXME there isn't actually debug info here
- DebugLoc dl = Op.getDebugLoc();
- bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
- EVT PtrVT = Op.getValueType();
- JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64)
+ return getAddrNonPIC(Op, DAG);
- if (!IsPIC && !IsN64) {
- JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MipsII::MO_ABS_HI);
- HiPart = DAG.getNode(MipsISD::Hi, dl, PtrVT, JTI);
- JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MipsII::MO_ABS_LO);
- } else {// Emit Load from Global Pointer
- unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
- unsigned OfstFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
- JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, GOTFlag);
- JTI = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, GetGlobalReg(DAG, PtrVT),
- JTI);
- HiPart = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), JTI,
- MachinePointerInfo(), false, false, false, 0);
- JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OfstFlag);
- }
-
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, JTILo);
- return DAG.getNode(ISD::ADD, dl, PtrVT, HiPart, Lo);
+ return getAddrLocal(Op, DAG, HasMips64);
}
SDValue MipsTargetLowering::
LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
{
- SDValue ResNode;
- ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
- const Constant *C = N->getConstVal();
- // FIXME there isn't actually debug info here
- DebugLoc dl = Op.getDebugLoc();
-
// gp_rel relocation
// FIXME: we should reference the constant pool using small data sections,
// but the asm printer currently doesn't support this feature without
@@ -2007,31 +2012,10 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
// SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
// ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
- SDValue CPHi = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
- N->getOffset(), MipsII::MO_ABS_HI);
- SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
- N->getOffset(), MipsII::MO_ABS_LO);
- SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CPHi);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo);
- ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
- } else {
- EVT ValTy = Op.getValueType();
- unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
- unsigned OFSTFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
- SDValue CP = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(),
- N->getOffset(), GOTFlag);
- CP = DAG.getNode(MipsISD::Wrapper, dl, ValTy, GetGlobalReg(DAG, ValTy), CP);
- SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), CP,
- MachinePointerInfo::getConstantPool(), false,
- false, false, 0);
- SDValue CPLo = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(),
- N->getOffset(), OFSTFlag);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, CPLo);
- ResNode = DAG.getNode(ISD::ADD, dl, ValTy, Load, Lo);
- }
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64)
+ return getAddrNonPIC(Op, DAG);
- return ResNode;
+ return getAddrLocal(Op, DAG, HasMips64);
}
SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
@@ -2923,60 +2907,41 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
- unsigned char OpFlag;
bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25
bool GlobalOrExternal = false;
SDValue CalleeLo;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- if (IsPICCall && G->getGlobal()->hasInternalLinkage()) {
- OpFlag = IsO32 ? MipsII::MO_GOT : MipsII::MO_GOT_PAGE;
- unsigned char LoFlag = IsO32 ? MipsII::MO_ABS_LO : MipsII::MO_GOT_OFST;
+ if (IsPICCall) {
+ if (G->getGlobal()->hasInternalLinkage())
+ Callee = getAddrLocal(Callee, DAG, HasMips64);
+ else if (LargeGOT)
+ Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16,
+ MipsII::MO_CALL_LO16);
+ else
+ Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL);
+ } else
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0,
- OpFlag);
- CalleeLo = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(),
- 0, LoFlag);
- } else {
- OpFlag = IsPICCall ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG;
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
- getPointerTy(), 0, OpFlag);
- }
-
+ MipsII::MO_NO_FLAG);
GlobalOrExternal = true;
}
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- if (IsN64 || (!IsO32 && IsPIC))
- OpFlag = MipsII::MO_GOT_DISP;
- else if (!IsPIC) // !N64 && static
- OpFlag = MipsII::MO_NO_FLAG;
+ if (!IsN64 && !IsPIC) // !N64 && static
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
+ MipsII::MO_NO_FLAG);
+ else if (LargeGOT)
+ Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16,
+ MipsII::MO_CALL_LO16);
+ else if (HasMips64)
+ Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_DISP);
else // O32 & PIC
- OpFlag = MipsII::MO_GOT_CALL;
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
- OpFlag);
+ Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL);
+
GlobalOrExternal = true;
}
SDValue InFlag;
- // Create nodes that load address of callee and copy it to T9
- if (IsPICCall) {
- if (GlobalOrExternal) {
- // Load callee address
- Callee = DAG.getNode(MipsISD::Wrapper, dl, getPointerTy(),
- GetGlobalReg(DAG, getPointerTy()), Callee);
- SDValue LoadValue = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- Callee, MachinePointerInfo::getGOT(),
- false, false, false, 0);
-
- // Use GOT+LO if callee has internal linkage.
- if (CalleeLo.getNode()) {
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, getPointerTy(), CalleeLo);
- Callee = DAG.getNode(ISD::ADD, dl, getPointerTy(), LoadValue, Lo);
- } else
- Callee = LoadValue;
- }
- }
-
// T9 register operand.
SDValue T9;
@@ -3544,6 +3509,26 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
bool
+MipsTargetLowering::isLegalAddressingMode(const AddrMode &AM, Type *Ty) const {
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (!AM.HasBaseReg) // allow "r+i".
+ break;
+ return false; // disallow "r+r" or "r+r+i".
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+bool
MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The Mips target isn't yet aware of offsets.
return false;
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 77045c3162..0db15cf53f 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -362,6 +362,8 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
+
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 7844df9f40..3d6c1ca43f 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -282,7 +282,7 @@ let Predicates = [NotN64, NotMips64, HasStandardEncoding] in {
}
// Indexed loads and stores.
-let Predicates = [HasMips32r2Or64, IsNotNaCl/*@LOCALMOD*/] in {
+let Predicates = [HasFPIdx, IsNotNaCl/*@LOCALMOD*/] in {
def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load>;
def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store>;
}
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 3142ac94b1..874eb136c0 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -148,14 +148,14 @@ def HasSwap : Predicate<"Subtarget.hasSwap()">,
AssemblerPredicate<"FeatureSwap">;
def HasCondMov : Predicate<"Subtarget.hasCondMov()">,
AssemblerPredicate<"FeatureCondMov">;
+def HasFPIdx : Predicate<"Subtarget.hasFPIdx()">,
+ AssemblerPredicate<"FeatureFPIdx">;
def HasMips32 : Predicate<"Subtarget.hasMips32()">,
AssemblerPredicate<"FeatureMips32">;
def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">,
AssemblerPredicate<"FeatureMips32r2">;
def HasMips64 : Predicate<"Subtarget.hasMips64()">,
AssemblerPredicate<"FeatureMips64">;
-def HasMips32r2Or64 : Predicate<"Subtarget.hasMips32r2Or64()">,
- AssemblerPredicate<"FeatureMips32r2,FeatureMips64">;
def NotMips64 : Predicate<"!Subtarget.hasMips64()">,
AssemblerPredicate<"!FeatureMips64">;
def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">,
@@ -1187,12 +1187,14 @@ def : MipsPat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
def : MipsPat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
def : MipsPat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
def : MipsPat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
+def : MipsPat<(MipsHi texternalsym:$in), (LUi texternalsym:$in)>;
def : MipsPat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>;
def : MipsPat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>;
def : MipsPat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
def : MipsPat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
def : MipsPat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
+def : MipsPat<(MipsLo texternalsym:$in), (ADDiu ZERO, texternalsym:$in)>;
def : MipsPat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
(ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 5d9f0cffb7..6cd887b0eb 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -258,7 +258,8 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
BalTgtMBB->addSuccessor(TgtMBB);
int64_t TgtAddress = MBBInfos[TgtMBB->getNumber()].Address;
- int64_t Offset = TgtAddress - (I.Address + I.Size - 20);
+ unsigned BalTgtMBBSize = 5;
+ int64_t Offset = TgtAddress - (I.Address + I.Size - BalTgtMBBSize * 4);
int64_t Lo = SignExtend64<16>(Offset & 0xffff);
int64_t Hi = SignExtend64<16>(((Offset + 0x8000) >> 16) & 0xffff);
@@ -351,6 +352,9 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
.addReg(Mips::SP_64).addImm(16)->setIsInsideBundle();
}
+
+ assert(BalTgtMBBSize == BalTgtMBB->size());
+ assert(LongBrMBB->size() + BalTgtMBBSize == LongBranchSeqSize);
} else {
// $longbr:
// j $tgt
@@ -361,6 +365,8 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
LongBrMBB->addSuccessor(TgtMBB);
BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::J)).addMBB(TgtMBB);
BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::NOP))->setIsInsideBundle();
+
+ assert(LongBrMBB->size() == LongBranchSeqSize);
}
if (I.Br->isUnconditionalBranch()) {
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index d8119ff75c..f783af0e9a 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -62,6 +62,10 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case MipsII::MO_GOT_OFST: Kind = MCSymbolRefExpr::VK_Mips_GOT_OFST; break;
case MipsII::MO_HIGHER: Kind = MCSymbolRefExpr::VK_Mips_HIGHER; break;
case MipsII::MO_HIGHEST: Kind = MCSymbolRefExpr::VK_Mips_HIGHEST; break;
+ case MipsII::MO_GOT_HI16: Kind = MCSymbolRefExpr::VK_Mips_GOT_HI16; break;
+ case MipsII::MO_GOT_LO16: Kind = MCSymbolRefExpr::VK_Mips_GOT_LO16; break;
+ case MipsII::MO_CALL_HI16: Kind = MCSymbolRefExpr::VK_Mips_CALL_HI16; break;
+ case MipsII::MO_CALL_LO16: Kind = MCSymbolRefExpr::VK_Mips_CALL_LO16; break;
}
switch (MOTy) {
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 1ff41ca358..f464d71fa6 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -31,8 +31,8 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false),
- HasMinMax(false), HasSwap(false), HasBitCount(false), InMips16Mode(false),
- HasDSP(false), HasDSPR2(false), IsAndroid(false)
+ HasMinMax(false), HasSwap(false), HasBitCount(false), HasFPIdx(false),
+ InMips16Mode(false), HasDSP(false), HasDSPR2(false), IsAndroid(false)
// @LOCALMOD-START
, TargetTriple(TT)
// @LOCALMOD-END
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 6eeab5c351..0ea85bb3c1 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -89,6 +89,9 @@ protected:
// HasBitCount - Count leading '1' and '0' bits.
bool HasBitCount;
+ // HasFPIdx -- Floating point indexed load/store instructions.
+ bool HasFPIdx;
+
// InMips16 -- can process Mips16 instructions
bool InMips16Mode;
@@ -129,8 +132,6 @@ public:
bool hasMips64() const { return MipsArchVersion >= Mips64; }
bool hasMips64r2() const { return MipsArchVersion == Mips64r2; }
- bool hasMips32r2Or64() const { return hasMips32r2() || hasMips64(); }
-
bool isLittle() const { return IsLittle; }
bool isFP64bit() const { return IsFP64bit; }
bool isGP64bit() const { return IsGP64bit; }
@@ -154,6 +155,7 @@ public:
bool hasMinMax() const { return HasMinMax; }
bool hasSwap() const { return HasSwap; }
bool hasBitCount() const { return HasBitCount; }
+ bool hasFPIdx() const { return HasFPIdx; }
// @LOCALMOD-BEGIN
bool isTargetNaCl() const {
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 3dd9bf5613..31ab68158c 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -68,7 +68,54 @@ static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src",
cl::location(llvm::InterleaveSrcInPtx));
+namespace {
+/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
+/// depends.
+void DiscoverDependentGlobals(Value *V,
+ DenseSet<GlobalVariable*> &Globals) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ Globals.insert(GV);
+ else {
+ if (User *U = dyn_cast<User>(V)) {
+ for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
+ DiscoverDependentGlobals(U->getOperand(i), Globals);
+ }
+ }
+ }
+}
+/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
+/// instances to be emitted, but only after any dependents have been added
+/// first.
+void VisitGlobalVariableForEmission(GlobalVariable *GV,
+ SmallVectorImpl<GlobalVariable*> &Order,
+ DenseSet<GlobalVariable*> &Visited,
+ DenseSet<GlobalVariable*> &Visiting) {
+ // Have we already visited this one?
+ if (Visited.count(GV)) return;
+
+ // Do we have a circular dependency?
+ if (Visiting.count(GV))
+ report_fatal_error("Circular dependency found in global variable set");
+
+ // Start visiting this global
+ Visiting.insert(GV);
+
+ // Make sure we visit all dependents first
+ DenseSet<GlobalVariable*> Others;
+ for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
+ DiscoverDependentGlobals(GV->getOperand(i), Others);
+
+ for (DenseSet<GlobalVariable*>::iterator I = Others.begin(),
+ E = Others.end(); I != E; ++I)
+ VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
+
+ // Now we can visit ourself
+ Order.push_back(GV);
+ Visited.insert(GV);
+ Visiting.erase(GV);
+}
+}
// @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we
// cannot just link to the existing version.
@@ -631,7 +678,7 @@ void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum,
O << ".global";
break;
default:
- assert("wrong value");
+ llvm_unreachable("Wrong Address Space");
}
}
else if (!strcmp(Modifier, "sign")) {
@@ -649,10 +696,10 @@ void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum,
O << ".v4";
}
else
- assert("unknown modifier");
+ llvm_unreachable("Unknown Modifier");
}
else
- assert("unknown modifier");
+ llvm_unreachable("Empty Modifier");
}
void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) {
@@ -893,10 +940,27 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
emitDeclarations(M, OS2);
- // Print out module-level global variables here.
+ // As ptxas does not support forward references of globals, we need to first
+ // sort the list of module-level globals in def-use order. We visit each
+ // global variable in order, and ensure that we emit it *after* its dependent
+ // globals. We use a little extra memory maintaining both a set and a list to
+ // have fast searches while maintaining a strict ordering.
+ SmallVector<GlobalVariable*,8> Globals;
+ DenseSet<GlobalVariable*> GVVisited;
+ DenseSet<GlobalVariable*> GVVisiting;
+
+ // Visit each global variable, in order
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- printModuleLevelGV(I, OS2);
+ I != E; ++I)
+ VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
+
+ assert(GVVisited.size() == M.getGlobalList().size() &&
+ "Missed a global variable");
+ assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
+
+ // Print out module-level global variables in proper order
+ for (unsigned i = 0, e = Globals.size(); i != e; ++i)
+ printModuleLevelGV(Globals[i], OS2);
OS2 << '\n';
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index be771e3567..f1a99d77be 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -174,10 +174,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// PTX does not support load / store predicate registers
- setOperationAction(ISD::LOAD, MVT::i1, Expand);
+ setOperationAction(ISD::LOAD, MVT::i1, Custom);
+ setOperationAction(ISD::STORE, MVT::i1, Custom);
+
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setOperationAction(ISD::STORE, MVT::i1, Expand);
setTruncStoreAction(MVT::i64, MVT::i1, Expand);
setTruncStoreAction(MVT::i32, MVT::i1, Expand);
setTruncStoreAction(MVT::i16, MVT::i1, Expand);
@@ -856,11 +857,64 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::EXTRACT_SUBVECTOR:
return Op;
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
default:
llvm_unreachable("Custom lowering not defined for operation");
}
}
+
+// v = ld i1* addr
+// =>
+// v1 = ld i8* addr
+// v = trunc v1 to i1
+SDValue NVPTXTargetLowering::
+LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *Node = Op.getNode();
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ DebugLoc dl = Node->getDebugLoc();
+ assert(LD->getExtensionType() == ISD::NON_EXTLOAD) ;
+ assert(Node->getValueType(0) == MVT::i1 &&
+ "Custom lowering for i1 load only");
+ SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(),
+ LD->getAlignment());
+ SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
+ // The legalizer (the caller) is expecting two values from the legalized
+ // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
+ // in LegalizeDAG.cpp which also uses MergeValues.
+ SDValue Ops[] = {result, LD->getChain()};
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+// st i1 v, addr
+// =>
+// v1 = zxt v to i8
+// st i8, addr
+SDValue NVPTXTargetLowering::
+LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ SDValue Tmp1 = ST->getChain();
+ SDValue Tmp2 = ST->getBasePtr();
+ SDValue Tmp3 = ST->getValue();
+ assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+ Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl,
+ MVT::i8, Tmp3);
+ SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
+ ST->getPointerInfo(), isVolatile,
+ isNonTemporal, Alignment);
+ return Result;
+}
+
+
SDValue
NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx,
EVT v) const {
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 86246e6449..94a177ceb0 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -138,6 +138,9 @@ private:
SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
};
} // namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index c3a683a2c6..3cfd9718e5 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -28,7 +28,6 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
std::string TargetName;
NVPTX::DrvInterface drvInterface;
- bool dummy; // For the 'dummy' feature, see NVPTX.td
bool Is64Bit;
// PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 87ecb13a4c..187c6b35af 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -61,7 +61,9 @@ public:
void RecordRelocation(MachObjectWriter *Writer,
const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
- MCValue Target, uint64_t &FixedValue) {}
+ MCValue Target, uint64_t &FixedValue) {
+ llvm_unreachable("Relocation emission for MachO/PPC unimplemented!");
+ }
};
class PPCAsmBackend : public MCAsmBackend {
@@ -92,6 +94,20 @@ public:
return Infos[Kind - FirstTargetFixupKind];
}
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+
+ // For each byte of the fragment that the fixup touches, mask in the bits
+ // from the fixup value. The Value has been "split up" into the appropriate
+ // bitfields above.
+ for (unsigned i = 0; i != 4; ++i)
+ Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
+ }
+
bool mayNeedRelaxation(const MCInst &Inst) const {
// FIXME.
return false;
@@ -135,11 +151,6 @@ namespace {
public:
DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { }
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value) const {
- llvm_unreachable("UNIMP");
- }
-
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
bool is64 = getPointerSize() == 8;
return createMachObjectWriter(new PPCMachObjectWriter(
@@ -161,19 +172,6 @@ namespace {
ELFPPCAsmBackend(const Target &T, uint8_t OSABI) :
PPCAsmBackend(T), OSABI(OSABI) { }
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value) const {
- Value = adjustFixupValue(Fixup.getKind(), Value);
- if (!Value) return; // Doesn't change encoding.
-
- unsigned Offset = Fixup.getOffset();
-
- // For each byte of the fragment that the fixup touches, mask in the bits from
- // the fixup value. The Value has been "split up" into the appropriate
- // bitfields above.
- for (unsigned i = 0; i != 4; ++i)
- Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
- }
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
bool is64 = getPointerSize() == 8;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 1518a60db8..dc93f7124a 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -74,10 +74,26 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
Type = ELF::R_PPC_ADDR14; // XXX: or BRNTAKEN?_
break;
case PPC::fixup_ppc_ha16:
- Type = ELF::R_PPC_ADDR16_HA;
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_PPC_TPREL16_HA:
+ Type = ELF::R_PPC_TPREL16_HA;
+ break;
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC_ADDR16_HA;
+ break;
+ }
break;
case PPC::fixup_ppc_lo16:
- Type = ELF::R_PPC_ADDR16_LO;
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_PPC_TPREL16_LO:
+ Type = ELF::R_PPC_TPREL16_LO;
+ break;
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC_ADDR16_LO;
+ break;
+ }
break;
case PPC::fixup_ppc_lo14:
Type = ELF::R_PPC_ADDR14;
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 4387730fcc..3900c8bab4 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -37,6 +37,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
@@ -54,12 +55,13 @@
#include "llvm/Support/ELF.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/MapVector.h"
using namespace llvm;
namespace {
class PPCAsmPrinter : public AsmPrinter {
protected:
- DenseMap<MCSymbol*, MCSymbol*> TOC;
+ MapVector<MCSymbol*, MCSymbol*> TOC;
const PPCSubtarget &Subtarget;
uint64_t TOCLabelID;
public:
@@ -348,14 +350,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbol *PICBase = MF->getPICBaseSymbol();
// Emit the 'bl'.
- TmpInst.setOpcode(PPC::BL_Darwin); // Darwin vs SVR4 doesn't matter here.
-
-
- // FIXME: We would like an efficient form for this, so we don't have to do
- // a lot of extra uniquing.
- TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::
- Create(PICBase, OutContext)));
- OutStreamer.EmitInstruction(TmpInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL_Darwin) // Darwin vs SVR4 doesn't matter here.
+ // FIXME: We would like an efficient form for this, so we don't have to do
+ // a lot of extra uniquing.
+ .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
// Emit the label.
OutStreamer.EmitLabel(PICBase);
@@ -404,9 +402,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Into: %R3 = MFCR ;; cr7
OutStreamer.AddComment(PPCInstPrinter::
getRegisterName(MI->getOperand(1).getReg()));
- TmpInst.setOpcode(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR);
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- OutStreamer.EmitInstruction(TmpInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR)
+ .addReg(MI->getOperand(0).getReg()));
return;
case PPC::SYNC:
// In Book E sync is called msync, handle this special case here...
@@ -465,8 +462,7 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
SectionKind::getReadOnly());
OutStreamer.SwitchSection(Section);
- // FIXME: This is nondeterminstic!
- for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
+ for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
E = TOC.end(); I != E; ++I) {
OutStreamer.EmitLabel(I->second);
MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName());
@@ -549,16 +545,13 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) {
// Remove $stub suffix, add $lazy_ptr.
- SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()-5);
- TmpStr += "$lazy_ptr";
- return Ctx.GetOrCreateSymbol(TmpStr.str());
+ StringRef NoStub = Sym->getName().substr(0, Sym->getName().size()-5);
+ return Ctx.GetOrCreateSymbol(NoStub + "$lazy_ptr");
}
static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
// Add $tmp suffix to $stub, yielding $stub$tmp.
- SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end());
- TmpStr += "$tmp";
- return Ctx.GetOrCreateSymbol(TmpStr.str());
+ return Ctx.GetOrCreateSymbol(Sym->getName() + "$tmp");
}
void PPCDarwinAsmPrinter::
@@ -589,32 +582,50 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
OutStreamer.EmitLabel(Stub);
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+
+ // mflr r0
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
// FIXME: MCize this.
- OutStreamer.EmitRawText(StringRef("\tmflr r0"));
- OutStreamer.EmitRawText("\tbcl 20,31," + Twine(AnonSymbol->getName()));
+ OutStreamer.EmitRawText("\tbcl 20, 31, " + Twine(AnonSymbol->getName()));
OutStreamer.EmitLabel(AnonSymbol);
- OutStreamer.EmitRawText(StringRef("\tmflr r11"));
- OutStreamer.EmitRawText("\taddis r11,r11,ha16("+Twine(LazyPtr->getName())+
- "-" + AnonSymbol->getName() + ")");
- OutStreamer.EmitRawText(StringRef("\tmtlr r0"));
-
- if (isPPC64)
- OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
- "-" + AnonSymbol->getName() + ")(r11)");
- else
- OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
- "-" + AnonSymbol->getName() + ")(r11)");
- OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
- OutStreamer.EmitRawText(StringRef("\tbctr"));
-
+ // mflr r11
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
+ // addis r11, r11, ha16(LazyPtr - AnonSymbol)
+ const MCExpr *Sub =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(LazyPtr, OutContext),
+ MCSymbolRefExpr::Create(AnonSymbol, OutContext),
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS)
+ .addReg(PPC::R11)
+ .addReg(PPC::R11)
+ .addExpr(Sub));
+ // mtlr r0
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
+
+ // ldu r12, lo16(LazyPtr - AnonSymbol)(r11)
+ // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11)
+ OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+ .addReg(PPC::R12)
+ .addExpr(Sub).addExpr(Sub)
+ .addReg(PPC::R11));
+ // mtctr r12
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+ // bctr
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
+
OutStreamer.SwitchSection(LSPSection);
OutStreamer.EmitLabel(LazyPtr);
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
-
- if (isPPC64)
- OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
- else
- OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+
+ MCSymbol *DyldStubBindingHelper =
+ OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+ if (isPPC64) {
+ // .quad dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
+ } else {
+ // .long dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
+ }
}
OutStreamer.AddBlankLine();
return;
@@ -634,23 +645,42 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
EmitAlignment(4);
OutStreamer.EmitLabel(Stub);
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
- OutStreamer.EmitRawText("\tlis r11,ha16(" + Twine(LazyPtr->getName()) +")");
- if (isPPC64)
- OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
- ")(r11)");
- else
- OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
- ")(r11)");
- OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
- OutStreamer.EmitRawText(StringRef("\tbctr"));
+ // lis r11, ha16(LazyPtr)
+ const MCExpr *LazyPtrHa16 =
+ MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_HA16,
+ OutContext);
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::LIS)
+ .addReg(PPC::R11)
+ .addExpr(LazyPtrHa16));
+
+ const MCExpr *LazyPtrLo16 =
+ MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_LO16,
+ OutContext);
+ // ldu r12, lo16(LazyPtr)(r11)
+ // lwzu r12, lo16(LazyPtr)(r11)
+ OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+ .addReg(PPC::R12)
+ .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16)
+ .addReg(PPC::R11));
+
+ // mtctr r12
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+ // bctr
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR));
+
OutStreamer.SwitchSection(LSPSection);
OutStreamer.EmitLabel(LazyPtr);
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
-
- if (isPPC64)
- OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
- else
- OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+
+ MCSymbol *DyldStubBindingHelper =
+ OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+ if (isPPC64) {
+ // .quad dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
+ } else {
+ // .long dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
+ }
}
OutStreamer.AddBlankLine();
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index adf78d5233..7d97450676 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -347,6 +347,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FSQRT, VT, Expand);
+ setOperationAction(ISD::FLOG, VT, Expand);
+ setOperationAction(ISD::FLOG10, VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FEXP, VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FPOWI, VT, Expand);
+ setOperationAction(ISD::FFLOOR, VT, Expand);
+ setOperationAction(ISD::FCEIL, VT, Expand);
+ setOperationAction(ISD::FTRUNC, VT, Expand);
+ setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
@@ -373,12 +388,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setLoadExtAction(ISD::EXTLOAD, VT, Expand);
}
- for (unsigned i = (unsigned)MVT::FIRST_FP_VECTOR_VALUETYPE;
- i <= (unsigned)MVT::LAST_FP_VECTOR_VALUETYPE; ++i) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
- setOperationAction(ISD::FSQRT, VT, Expand);
- }
-
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
// with merges, splats, etc.
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
@@ -393,6 +402,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 6c2249a11b..9711452ec4 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -234,10 +234,10 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
let Defs = [CTR8], Uses = [CTR8] in {
- def BDZ8 : IForm_ext<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdz $dst", BrB, []>;
- def BDNZ8 : IForm_ext<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdnz $dst", BrB, []>;
+ def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz $dst">;
+ def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz $dst">;
}
}
@@ -511,7 +511,7 @@ def RLWINM8 : MForm_2<21,
"rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
[]>;
-def ISEL8 : AForm_1<31, 15,
+def ISEL8 : AForm_4<31, 15,
(outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond),
"isel $rT, $rA, $rB, $cond", IntGeneral,
[]>;
@@ -556,7 +556,7 @@ def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
"lhaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWAUX : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
"lwaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
@@ -606,7 +606,7 @@ def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result),
"lbzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZUX8 : XForm_1<31, 331, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
"lhzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
@@ -706,7 +706,7 @@ def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
let PPC970_Unit = 2 in {
-def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+def STBU8 : DForm_1a<39, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
"stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
[(set ptr_rc:$ea_res,
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index ba58c3e4ac..87758e90fb 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -721,3 +721,13 @@ def : Pat<(v4f32 (sint_to_fp (v4i32 VRRC:$vA))),
(VCFSX_0 VRRC:$vA)>;
def : Pat<(v4f32 (uint_to_fp (v4i32 VRRC:$vA))),
(VCFUX_0 VRRC:$vA)>;
+
+// Floating-point rounding
+def : Pat<(v4f32 (ffloor (v4f32 VRRC:$vA))),
+ (VRFIM VRRC:$vA)>;
+def : Pat<(v4f32 (fceil (v4f32 VRRC:$vA))),
+ (VRFIP VRRC:$vA)>;
+def : Pat<(v4f32 (ftrunc (v4f32 VRRC:$vA))),
+ (VRFIZ VRRC:$vA)>;
+def : Pat<(v4f32 (fnearbyint (v4f32 VRRC:$vA))),
+ (VRFIN VRRC:$vA)>;
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index a41a0279d2..c3c171cd21 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -94,12 +94,6 @@ class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
let Inst{31} = lk;
}
-class IForm_ext<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
- string asmstr, InstrItinClass itin, list<dag> pattern>
- : IForm<opcode, aa, lk, OOL, IOL, asmstr, itin, pattern> {
- let LI{0-4} = bo;
-}
-
// 1.7.2 B-Form
class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr>
: I<opcode, OOL, IOL, asmstr, BrB> {
@@ -118,6 +112,13 @@ class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr>
let Inst{31} = lk;
}
+class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
+ string asmstr>
+ : BForm<opcode, aa, lk, OOL, IOL, asmstr> {
+ let BIBO{4-0} = bo;
+ let BIBO{6-5} = 0;
+ let CR = 0;
+}
// 1.7.4 D-Form
class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
@@ -625,9 +626,9 @@ class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<8> FXM;
- bits<5> ST;
+ bits<5> rS;
- let Inst{6-10} = ST;
+ let Inst{6-10} = rS;
let Inst{11} = 0;
let Inst{12-19} = FXM;
let Inst{20} = 0;
@@ -666,7 +667,7 @@ class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
string cstr, InstrItinClass itin, list<dag>pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<8> FM;
- bits<5> RT;
+ bits<5> rT;
bit RC = 0; // set by isDOT
let Pattern = pattern;
@@ -675,7 +676,7 @@ class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{6} = 0;
let Inst{7-14} = FM;
let Inst{15} = 0;
- let Inst{16-20} = RT;
+ let Inst{16-20} = rT;
let Inst{21-30} = xo;
let Inst{31} = RC;
}
@@ -758,6 +759,26 @@ class AForm_3<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
let FRB = 0;
}
+class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RB;
+ bits<7> BIBO; // 2 bits of BI and 5 bits of BO (must be 12).
+ bits<3> CR;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-23} = CR;
+ let Inst{24-25} = BIBO{6-5};
+ let Inst{26-30} = xo;
+ let Inst{31} = 0;
+}
+
// 1.7.13 M-Form
class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 3ef3bab957..6ee045a2c7 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -446,10 +446,10 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
/*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
let Defs = [CTR], Uses = [CTR] in {
- def BDZ : IForm_ext<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdz $dst", BrB, []>;
- def BDNZ : IForm_ext<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdnz $dst", BrB, []>;
+ def BDZ : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz $dst">;
+ def BDNZ : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz $dst">;
}
}
@@ -732,7 +732,7 @@ def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result),
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZUX : XForm_1<31, 331, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc:$ea_result),
(ins memrr:$addr),
"lhzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
@@ -1395,13 +1395,13 @@ let Uses = [RM] in {
"fdivs $FRT, $FRA, $FRB", FPDivS,
[(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>;
def FMUL : AForm_3<63, 25,
- (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
- "fmul $FRT, $FRA, $FRB", FPFused,
- [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRB))]>;
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC),
+ "fmul $FRT, $FRA, $FRC", FPFused,
+ [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRC))]>;
def FMULS : AForm_3<59, 25,
- (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
- "fmuls $FRT, $FRA, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>;
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC),
+ "fmuls $FRT, $FRA, $FRC", FPGeneral,
+ [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRC))]>;
def FSUB : AForm_2<63, 20,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
"fsub $FRT, $FRA, $FRB", FPAddSub,
@@ -1414,7 +1414,7 @@ let Uses = [RM] in {
}
let PPC970_Unit = 1 in { // FXU Operations.
- def ISEL : AForm_1<31, 15,
+ def ISEL : AForm_4<31, 15,
(outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond),
"isel $rT, $rA, $rB, $cond", IntGeneral,
[]>;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 459c3589d3..1f00b3b2b0 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -498,7 +498,8 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
} else if (CRSpillFrameIdx) {
FrameIdx = CRSpillFrameIdx;
} else {
- MachineFrameInfo *MFI = ((MachineFunction &)MF).getFrameInfo();
+ MachineFrameInfo *MFI =
+ (const_cast<MachineFunction &>(MF)).getFrameInfo();
FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
CRSpillFrameIdx = FrameIdx;
}
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index 6d4eab1204..3a9ace4c52 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -39,6 +39,7 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"__cxa_guard_acquire",
"__cxa_guard_release",
"__memcpy_chk",
+ "abs",
"acos",
"acosf",
"acosh",
@@ -91,6 +92,9 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"fabs",
"fabsf",
"fabsl",
+ "ffs",
+ "ffsl",
+ "ffsll",
"fiprintf",
"floor",
"floorf",
@@ -98,11 +102,16 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"fmod",
"fmodf",
"fmodl",
+ "fprintf",
"fputc",
"fputs",
"free",
"fwrite",
"iprintf",
+ "isascii",
+ "isdigit",
+ "labs",
+ "llabs",
"log",
"log10",
"log10f",
@@ -132,6 +141,7 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"pow",
"powf",
"powl",
+ "printf",
"putchar",
"puts",
"realloc",
@@ -149,6 +159,7 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"sinhl",
"sinl",
"siprintf",
+ "sprintf",
"sqrt",
"sqrtf",
"sqrtl",
@@ -182,6 +193,7 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"tanhf",
"tanhl",
"tanl",
+ "toascii",
"trunc",
"truncf",
"truncl",
@@ -327,6 +339,41 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T,
// Win32 does *not* provide stpcpy. It is provided on POSIX systems:
// http://pubs.opengroup.org/onlinepubs/9699919799/functions/stpcpy.html
TLI.setUnavailable(LibFunc::stpcpy);
+
+ // Win32 does *not* provide ffs. It is provided on POSIX systems:
+ // http://pubs.opengroup.org/onlinepubs/009695399/functions/ffs.html
+ TLI.setUnavailable(LibFunc::ffs);
+
+ // Win32 does *not* provide llabs. It is defined in ISO/IEC 9899:1999,
+ // but Visual C++ does not support it.
+ TLI.setUnavailable(LibFunc::llabs);
+ }
+
+ // ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and
+ // Linux (GLIBC):
+ // http://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man3/ffsl.3.html
+ // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsl.c
+ // http://www.gnu.org/software/gnulib/manual/html_node/ffsl.html
+ switch (T.getOS()) {
+ case Triple::Darwin:
+ case Triple::MacOSX:
+ case Triple::IOS:
+ case Triple::FreeBSD:
+ case Triple::Linux:
+ break;
+ default:
+ TLI.setUnavailable(LibFunc::ffsl);
+ }
+
+ // ffsll is available on at least FreeBSD and Linux (GLIBC):
+ // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsll.c
+ // http://www.gnu.org/software/gnulib/manual/html_node/ffsll.html
+ switch (T.getOS()) {
+ case Triple::FreeBSD:
+ case Triple::Linux:
+ break;
+ default:
+ TLI.setUnavailable(LibFunc::ffsll);
}
}
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 9d7e2b825f..62ce86e292 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -285,35 +285,35 @@ TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const {
return DataSection;
}
-/// getExprForDwarfGlobalReference - Return an MCExpr to use for a
+/// getTTypeGlobalReference - Return an MCExpr to use for a
/// reference to the specified global variable from exception
/// handling information.
const MCExpr *TargetLoweringObjectFile::
-getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
- MachineModuleInfo *MMI, unsigned Encoding,
- MCStreamer &Streamer) const {
- const MCSymbol *Sym = Mang->getSymbol(GV);
- return getExprForDwarfReference(Sym, Encoding, Streamer);
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
+ const MCSymbolRefExpr *Ref =
+ MCSymbolRefExpr::Create(Mang->getSymbol(GV), getContext());
+
+ return getTTypeReference(Ref, Encoding, Streamer);
}
const MCExpr *TargetLoweringObjectFile::
-getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding,
- MCStreamer &Streamer) const {
- const MCExpr *Res = MCSymbolRefExpr::Create(Sym, getContext());
-
+getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
+ MCStreamer &Streamer) const {
switch (Encoding & 0x70) {
default:
report_fatal_error("We do not support this DWARF encoding yet!");
case dwarf::DW_EH_PE_absptr:
// Do nothing special
- return Res;
+ return Sym;
case dwarf::DW_EH_PE_pcrel: {
// Emit a label to the streamer for the current position. This gives us
// .-foo addressing.
MCSymbol *PCSym = getContext().CreateTempSymbol();
Streamer.EmitLabel(PCSym);
const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
- return MCBinaryExpr::CreateSub(Res, PC, getContext());
+ return MCBinaryExpr::CreateSub(Sym, PC, getContext());
}
}
}
diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp
index 59ffdea00e..af0cef62d5 100644
--- a/lib/Target/TargetSubtargetInfo.cpp
+++ b/lib/Target/TargetSubtargetInfo.cpp
@@ -22,6 +22,10 @@ TargetSubtargetInfo::TargetSubtargetInfo() {}
TargetSubtargetInfo::~TargetSubtargetInfo() {}
+bool TargetSubtargetInfo::enableMachineScheduler() const {
+ return false;
+}
+
bool TargetSubtargetInfo::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
AntiDepBreakMode& Mode,
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index ce446e7573..79f7c00960 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1748,6 +1748,7 @@ processInstruction(MCInst &Inst,
}
}
+static const char *getSubtargetFeatureName(unsigned Val);
bool X86AsmParser::
MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
@@ -1809,10 +1810,21 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
Out.EmitInstruction(Inst);
Opcode = Inst.getOpcode();
return false;
- case Match_MissingFeature:
- Error(IDLoc, "instruction requires a CPU feature not currently enabled",
- EmptyRanges, MatchingInlineAsm);
- return true;
+ case Match_MissingFeature: {
+ assert(ErrorInfo && "Unknown missing feature!");
+ // Special case the error message for the very common case where only
+ // a single subtarget feature is missing.
+ std::string Msg = "instruction requires:";
+ unsigned Mask = 1;
+ for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
+ if (ErrorInfo & Mask) {
+ Msg += " ";
+ Msg += getSubtargetFeatureName(ErrorInfo & Mask);
+ }
+ Mask <<= 1;
+ }
+ return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
+ }
case Match_InvalidOperand:
WasOriginallyInvalidOperand = true;
break;
@@ -1843,19 +1855,32 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// Check for the various suffix matches.
Tmp[Base.size()] = Suffixes[0];
unsigned ErrorInfoIgnore;
+ unsigned ErrorInfoMissingFeature;
unsigned Match1, Match2, Match3, Match4;
Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
isParsingIntelSyntax());
+ // If this returned as a missing feature failure, remember that.
+ if (Match1 == Match_MissingFeature)
+ ErrorInfoMissingFeature = ErrorInfoIgnore;
Tmp[Base.size()] = Suffixes[1];
Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
isParsingIntelSyntax());
+ // If this returned as a missing feature failure, remember that.
+ if (Match2 == Match_MissingFeature)
+ ErrorInfoMissingFeature = ErrorInfoIgnore;
Tmp[Base.size()] = Suffixes[2];
Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
isParsingIntelSyntax());
+ // If this returned as a missing feature failure, remember that.
+ if (Match3 == Match_MissingFeature)
+ ErrorInfoMissingFeature = ErrorInfoIgnore;
Tmp[Base.size()] = Suffixes[3];
Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
isParsingIntelSyntax());
+ // If this returned as a missing feature failure, remember that.
+ if (Match4 == Match_MissingFeature)
+ ErrorInfoMissingFeature = ErrorInfoIgnore;
// Restore the old token.
Op->setTokenValue(Base);
@@ -1936,9 +1961,16 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// missing feature.
if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
(Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
- Error(IDLoc, "instruction requires a CPU feature not currently enabled",
- EmptyRanges, MatchingInlineAsm);
- return true;
+ std::string Msg = "instruction requires:";
+ unsigned Mask = 1;
+ for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) {
+ if (ErrorInfoMissingFeature & Mask) {
+ Msg += " ";
+ Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask);
+ }
+ Mask <<= 1;
+ }
+ return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm);
}
// If one instruction matched with an invalid operand, report this as an
@@ -2039,4 +2071,5 @@ extern "C" void LLVMInitializeX86AsmParser() {
#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
+#define GET_SUBTARGET_FEATURE_NAME
#include "X86GenAsmMatcher.inc"
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index cbdfeaedbe..b8b0a5a05b 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -68,12 +68,6 @@ FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
///
FunctionPass *createEmitX86CodeToMemory();
-/// createX86MaxStackAlignmentHeuristicPass - This function returns a pass
-/// which determines whether the frame pointer register should be
-/// reserved in case dynamic stack alignment is later required.
-///
-FunctionPass *createX86MaxStackAlignmentHeuristicPass();
-
} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 6786756c7f..7ad2fdd259 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -103,6 +103,15 @@ def RetCC_Intel_OCL_BI : CallingConv<[
CCDelegateTo<RetCC_X86Common>
]>;
+// X86-32 HiPE return-value convention.
+def RetCC_X86_32_HiPE : CallingConv<[
+ // Promote all types to i32
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Return: HP, P, VAL1, VAL2
+ CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX]>>
+]>;
+
// X86-64 C return-value convention.
def RetCC_X86_64_C : CallingConv<[
// The X86-64 calling convention always returns FP values in XMM0.
@@ -123,17 +132,30 @@ def RetCC_X86_Win64_C : CallingConv<[
CCDelegateTo<RetCC_X86_64_C>
]>;
+// X86-64 HiPE return-value convention.
+def RetCC_X86_64_HiPE : CallingConv<[
+ // Promote all types to i64
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Return: HP, P, VAL1, VAL2
+ CCIfType<[i64], CCAssignToReg<[R15, RBP, RAX, RDX]>>
+]>;
// This is the root return-value convention for the X86-32 backend.
def RetCC_X86_32 : CallingConv<[
// If FastCC, use RetCC_X86_32_Fast.
CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
+ // If HiPE, use RetCC_X86_32_HiPE.
+ CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
+
// Otherwise, use RetCC_X86_32_C.
CCDelegateTo<RetCC_X86_32_C>
]>;
// This is the root return-value convention for the X86-64 backend.
def RetCC_X86_64 : CallingConv<[
+ // HiPE uses RetCC_X86_64_HiPE
+ CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_64_HiPE>>,
// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
@@ -254,29 +276,6 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[f80], CCAssignToStack<0, 0>>
]>;
-// X86-64 Intel OpenCL built-ins calling convention.
-def CC_Intel_OCL_BI : CallingConv<[
- CCIfType<[i32], CCIfSubtarget<"isTargetWin32()", CCAssignToStack<4, 4>>>,
-
- CCIfType<[i32], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[ECX, EDX, R8D, R9D]>>>,
- CCIfType<[i64], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[RCX, RDX, R8, R9 ]>>>,
-
- CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX]>>,
- CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX]>>,
-
- // The SSE vector arguments are passed in XMM registers.
- CCIfType<[f32, f64, v4i32, v2i64, v4f32, v2f64],
- CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>,
-
- // The 256-bit vector arguments are passed in YMM registers.
- CCIfType<[v8f32, v4f64, v8i32, v4i64],
- CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>,
-
- CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
- CCDelegateTo<CC_X86_64_C>
-]>;
-
-
def CC_X86_64_GHC : CallingConv<[
// Promote i8/i16/i32 arguments to i64.
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
@@ -291,6 +290,18 @@ def CC_X86_64_GHC : CallingConv<[
CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
]>;
+def CC_X86_64_HiPE : CallingConv<[
+ // Promote i8/i16/i32 arguments to i64.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Pass in VM's registers: HP, P, ARG0, ARG1, ARG2, ARG3
+ CCIfType<[i64], CCAssignToReg<[R15, RBP, RSI, RDX, RCX, R8]>>,
+
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
+
//===----------------------------------------------------------------------===//
// X86 C Calling Convention
//===----------------------------------------------------------------------===//
@@ -422,6 +433,42 @@ def CC_X86_32_GHC : CallingConv<[
CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>>
]>;
+def CC_X86_32_HiPE : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Pass in VM's registers: HP, P, ARG0, ARG1, ARG2
+ CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX, ECX]>>,
+
+ // Integer/Float values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>
+]>;
+
+// X86-64 Intel OpenCL built-ins calling convention.
+def CC_Intel_OCL_BI : CallingConv<[
+
+ CCIfType<[i32], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[ECX, EDX, R8D, R9D]>>>,
+ CCIfType<[i64], CCIfSubtarget<"isTargetWin64()", CCAssignToReg<[RCX, RDX, R8, R9 ]>>>,
+
+ CCIfType<[i32], CCIfSubtarget<"is64Bit()", CCAssignToReg<[EDI, ESI, EDX, ECX]>>>,
+ CCIfType<[i64], CCIfSubtarget<"is64Bit()", CCAssignToReg<[RDI, RSI, RDX, RCX]>>>,
+
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+
+ // The SSE vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>,
+
+ // The 256-bit vector arguments are passed in YMM registers.
+ CCIfType<[v8f32, v4f64, v8i32, v4i64],
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>,
+
+ CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
+ CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64_C>>,
+ CCDelegateTo<CC_X86_32_C>
+]>;
+
//===----------------------------------------------------------------------===//
// X86 Root Argument Calling Conventions
//===----------------------------------------------------------------------===//
@@ -432,6 +479,7 @@ def CC_X86_32 : CallingConv<[
CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
+ CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
// Otherwise, drop to normal X86-32 CC
CCDelegateTo<CC_X86_32_C>
@@ -440,6 +488,7 @@ def CC_X86_32 : CallingConv<[
// This is the root argument convention for the X86-64 backend.
def CC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_64_GHC>>,
+ CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_64_HiPE>>,
// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index ad652366ad..8a39fd5142 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -297,7 +297,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
case MVT::i32: Opc = X86::MOV32mi; break;
case MVT::i64:
// Must be a 32-bit sign extended value.
- if ((int)CI->getSExtValue() == CI->getSExtValue())
+ if (isInt<32>(CI->getSExtValue()))
Opc = X86::MOV64mi32;
break;
}
@@ -2197,13 +2197,13 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
MVT VT;
if (!isTypeLegal(CF->getType(), VT))
- return false;
+ return 0;
// Get opcode and regclass for the given zero.
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
switch (VT.SimpleTy) {
- default: return false;
+ default: return 0;
case MVT::f32:
if (X86ScalarSSEf32) {
Opc = X86::FsFLD0SS;
@@ -2224,7 +2224,7 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
break;
case MVT::f80:
// No f80 support yet.
- return false;
+ return 0;
}
unsigned ResultReg = createResultReg(RC);
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 791f5982af..50d0fb5c98 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -111,7 +111,7 @@ namespace {
EdgeBundles *Bundles;
// Return a bitmask of FP registers in block's live-in list.
- unsigned calcLiveInMask(MachineBasicBlock *MBB) {
+ static unsigned calcLiveInMask(MachineBasicBlock *MBB) {
unsigned Mask = 0;
for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
E = MBB->livein_end(); I != E; ++I) {
@@ -198,7 +198,7 @@ namespace {
}
/// getScratchReg - Return an FP register that is not currently in use.
- unsigned getScratchReg() {
+ unsigned getScratchReg() const {
for (int i = NumFPRegs - 1; i >= 8; --i)
if (!isLive(i))
return i;
@@ -206,7 +206,7 @@ namespace {
}
/// isScratchReg - Returns trus if RegNo is a scratch FP register.
- bool isScratchReg(unsigned RegNo) {
+ static bool isScratchReg(unsigned RegNo) {
return RegNo > 8 && RegNo < NumFPRegs;
}
@@ -311,7 +311,7 @@ namespace {
void handleSpecialFP(MachineBasicBlock::iterator &I);
// Check if a COPY instruction is using FP registers.
- bool isFPCopy(MachineInstr *MI) {
+ static bool isFPCopy(MachineInstr *MI) {
unsigned DstReg = MI->getOperand(0).getReg();
unsigned SrcReg = MI->getOperand(1).getReg();
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 42134256e3..d9d354851d 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -22,8 +22,6 @@
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/Type.h"
-#include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -31,7 +29,6 @@
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 5610bb5ba3..bf51a6b6ba 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -737,74 +737,79 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// First set operation action for all vector types to either promote
// (for widening) or expand (for scalarization). Then we will selectively
// turn on ones that can be effectively codegen'd.
- for (int VT = MVT::FIRST_VECTOR_VALUETYPE;
- VT <= MVT::LAST_VECTOR_VALUETYPE; ++VT) {
- setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FNEG, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FSUB, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::MUL , (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FMUL, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SDIV, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::UDIV, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FDIV, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SREM, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::UREM, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand);
- setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
- setOperationAction(ISD::INSERT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
- setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FMA, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FFLOOR, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::UDIVREM, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SETCC, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FP_TO_UINT, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,Expand);
- setOperationAction(ISD::TRUNCATE, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::SIGN_EXTEND, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand);
- setOperationAction(ISD::VSELECT, (MVT::SimpleValueType)VT, Expand);
+ for (int i = MVT::FIRST_VECTOR_VALUETYPE;
+ i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+ setOperationAction(ISD::ADD , VT, Expand);
+ setOperationAction(ISD::SUB , VT, Expand);
+ setOperationAction(ISD::FADD, VT, Expand);
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FSUB, VT, Expand);
+ setOperationAction(ISD::MUL , VT, Expand);
+ setOperationAction(ISD::FMUL, VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::LOAD, VT, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
+ setOperationAction(ISD::FMA, VT, Expand);
+ setOperationAction(ISD::FPOWI, VT, Expand);
+ setOperationAction(ISD::FSQRT, VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+ setOperationAction(ISD::FFLOOR, VT, Expand);
+ setOperationAction(ISD::FCEIL, VT, Expand);
+ setOperationAction(ISD::FTRUNC, VT, Expand);
+ setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::SHL, VT, Expand);
+ setOperationAction(ISD::SRA, VT, Expand);
+ setOperationAction(ISD::SRL, VT, Expand);
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
+ setOperationAction(ISD::SETCC, VT, Expand);
+ setOperationAction(ISD::FLOG, VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FLOG10, VT, Expand);
+ setOperationAction(ISD::FEXP, VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+ setOperationAction(ISD::FP_TO_SINT, VT, Expand);
+ setOperationAction(ISD::UINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::SINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
+ setOperationAction(ISD::TRUNCATE, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
+ setOperationAction(ISD::ANY_EXTEND, VT, Expand);
+ setOperationAction(ISD::VSELECT, VT, Expand);
for (int InnerVT = MVT::FIRST_VECTOR_VALUETYPE;
InnerVT <= MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
- setTruncStoreAction((MVT::SimpleValueType)VT,
+ setTruncStoreAction(VT,
(MVT::SimpleValueType)InnerVT, Expand);
- setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
- setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, Expand);
}
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
@@ -985,7 +990,15 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
@@ -1065,6 +1078,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v8f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v8f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::v8f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v8f32, Legal);
setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
setOperationAction(ISD::FABS, MVT::v8f32, Custom);
@@ -1074,6 +1091,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::v4f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
setOperationAction(ISD::FABS, MVT::v4f64, Custom);
@@ -1115,12 +1136,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
- setOperationAction(ISD::FMA, MVT::v8f32, Custom);
- setOperationAction(ISD::FMA, MVT::v4f64, Custom);
- setOperationAction(ISD::FMA, MVT::v4f32, Custom);
- setOperationAction(ISD::FMA, MVT::v2f64, Custom);
- setOperationAction(ISD::FMA, MVT::f32, Custom);
- setOperationAction(ISD::FMA, MVT::f64, Custom);
+ setOperationAction(ISD::FMA, MVT::v8f32, Legal);
+ setOperationAction(ISD::FMA, MVT::v4f64, Legal);
+ setOperationAction(ISD::FMA, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMA, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMA, MVT::f32, Legal);
+ setOperationAction(ISD::FMA, MVT::f64, Legal);
}
if (Subtarget->hasAVX2()) {
@@ -1381,18 +1402,14 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
bool IsZeroVal,
bool MemcpyStrSrc,
MachineFunction &MF) const {
- // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
- // linux. This is because the stack realignment code can't handle certain
- // cases like PR2962. This should be removed when PR2962 is fixed.
const Function *F = MF.getFunction();
if (IsZeroVal &&
!F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) {
if (Size >= 16 &&
(Subtarget->isUnalignedMemAccessFast() ||
((DstAlign == 0 || DstAlign >= 16) &&
- (SrcAlign == 0 || SrcAlign >= 16))) &&
- Subtarget->getStackAlignment() >= 16) {
- if (Subtarget->getStackAlignment() >= 32) {
+ (SrcAlign == 0 || SrcAlign >= 16)))) {
+ if (Size >= 32) {
if (Subtarget->hasAVX2())
return MVT::v8i32;
if (Subtarget->hasAVX())
@@ -1404,7 +1421,6 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
return MVT::v4f32;
} else if (!MemcpyStrSrc && Size >= 8 &&
!Subtarget->is64Bit() &&
- Subtarget->getStackAlignment() >= 8 &&
Subtarget->hasSSE2()) {
// Do not use f64 to lower memcpy if source is string constant. It's
// better to use i32 to avoid the loads.
@@ -1835,7 +1851,8 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
/// IsTailCallConvention - Return true if the calling convention is one that
/// supports tail call optimization.
static bool IsTailCallConvention(CallingConv::ID CC) {
- return (CC == CallingConv::Fast || CC == CallingConv::GHC);
+ return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
+ CC == CallingConv::HiPE);
}
bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
@@ -1922,7 +1939,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
bool IsWin64 = Subtarget->isTargetWin64();
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
- "Var args not supported with calling convention fastcc or ghc");
+ "Var args not supported with calling convention fastcc, ghc or hipe");
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -2269,7 +2286,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
- "Var args not supported with calling convention fastcc or ghc");
+ "Var args not supported with calling convention fastcc, ghc or hipe");
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -3135,6 +3152,8 @@ bool X86::isCalleePop(CallingConv::ID CallingConv,
return TailCallOpt;
case CallingConv::GHC:
return TailCallOpt;
+ case CallingConv::HiPE:
+ return TailCallOpt;
}
}
@@ -6702,7 +6721,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
// Handle splats by matching through known shuffle masks
if ((Size == 128 && NumElem <= 4) ||
- (Size == 256 && NumElem < 8))
+ (Size == 256 && NumElem <= 8))
return SDValue();
// All remaning splats are promoted to target supported vector shuffles.
@@ -12924,8 +12943,8 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
case X86::ATOMSUB6432: {
unsigned HiOpc;
unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
- BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg).addReg(LoReg);
- BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg).addReg(HiReg);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(LoReg).addReg(SrcLoReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(HiReg).addReg(SrcHiReg);
break;
}
case X86::ATOMNAND6432: {
@@ -14439,6 +14458,18 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
Ld->getAlignment(),
false/*isVolatile*/, true/*ReadMem*/,
false/*WriteMem*/);
+
+ // Make sure the newly-created LOAD is in the same position as Ld in
+ // terms of dependency. We create a TokenFactor for Ld and ResNode,
+ // and update uses of Ld's output chain to use the TokenFactor.
+ if (Ld->hasAnyUseOfValue(1)) {
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ SDValue(Ld, 1), SDValue(ResNode.getNode(), 1));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain);
+ DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1),
+ SDValue(ResNode.getNode(), 1));
+ }
+
return DAG.getNode(ISD::BITCAST, dl, VT, ResNode);
}
}
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index 959d91a9ab..d360a73b34 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -307,8 +307,6 @@ let isCodeGenOnly = 1 in {
} // isCodeGenOnly = 1
}
-let Predicates = [HasFMA4] in {
-
defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
int_x86_fma_vfmadd_ss>;
@@ -338,29 +336,33 @@ defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
int_x86_fma_vfnmsub_sd>;
-defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
- memopv4f32, memopv8f32>;
-defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
- memopv2f64, memopv4f64>;
-defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
- memopv4f32, memopv8f32>;
-defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
- memopv2f64, memopv4f64>;
-defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
- memopv4f32, memopv8f32>;
-defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
- memopv2f64, memopv4f64>;
-defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
- memopv4f32, memopv8f32>;
-defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,
- memopv2f64, memopv4f64>;
-defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32,
- memopv4f32, memopv8f32>;
-defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64,
- memopv2f64, memopv4f64>;
-defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32,
- memopv4f32, memopv8f32>;
-defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64,
- memopv2f64, memopv4f64>;
-} // HasFMA4
+let ExeDomain = SSEPackedSingle in {
+ defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+ defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+ defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+ defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+ defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+ defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32,
+ memopv4f32, memopv8f32>;
+}
+
+let ExeDomain = SSEPackedDouble in {
+ defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+ defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+ defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+ defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+ defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+ defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64,
+ memopv2f64, memopv4f64>;
+}
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 7309942880..70a968c4f5 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -571,7 +571,7 @@ class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
// FMA4 Instruction Templates
class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
- : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
+ : I<o, F, outs, ins, asm, pattern, itin>, TA,
OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>;
// XOP 2, 3 and 4 Operand Instruction Template
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index cec4625135..921ec0d84b 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -617,9 +617,9 @@ def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
def In32BitMode : Predicate<"!Subtarget->is64Bit()">,
- AssemblerPredicate<"!Mode64Bit">;
+ AssemblerPredicate<"!Mode64Bit", "32-bit mode">;
def In64BitMode : Predicate<"Subtarget->is64Bit()">,
- AssemblerPredicate<"Mode64Bit">;
+ AssemblerPredicate<"Mode64Bit", "64-bit mode">;
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
def IsNotNaCl : Predicate<"!Subtarget->isTargetNaCl()">;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 6f48d7ed7f..229e8b263f 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -6378,12 +6378,47 @@ let Predicates = [HasAVX] in {
def : Pat<(v4f32 (ffloor VR128:$src)),
(VROUNDPSr VR128:$src, (i32 0x1))>;
+ def : Pat<(v4f32 (fnearbyint VR128:$src)),
+ (VROUNDPSr VR128:$src, (i32 0xC))>;
+ def : Pat<(v4f32 (fceil VR128:$src)),
+ (VROUNDPSr VR128:$src, (i32 0x2))>;
+ def : Pat<(v4f32 (frint VR128:$src)),
+ (VROUNDPSr VR128:$src, (i32 0x4))>;
+ def : Pat<(v4f32 (ftrunc VR128:$src)),
+ (VROUNDPSr VR128:$src, (i32 0x3))>;
+
def : Pat<(v2f64 (ffloor VR128:$src)),
(VROUNDPDr VR128:$src, (i32 0x1))>;
+ def : Pat<(v2f64 (fnearbyint VR128:$src)),
+ (VROUNDPDr VR128:$src, (i32 0xC))>;
+ def : Pat<(v2f64 (fceil VR128:$src)),
+ (VROUNDPDr VR128:$src, (i32 0x2))>;
+ def : Pat<(v2f64 (frint VR128:$src)),
+ (VROUNDPDr VR128:$src, (i32 0x4))>;
+ def : Pat<(v2f64 (ftrunc VR128:$src)),
+ (VROUNDPDr VR128:$src, (i32 0x3))>;
+
def : Pat<(v8f32 (ffloor VR256:$src)),
(VROUNDYPSr VR256:$src, (i32 0x1))>;
+ def : Pat<(v8f32 (fnearbyint VR256:$src)),
+ (VROUNDYPSr VR256:$src, (i32 0xC))>;
+ def : Pat<(v8f32 (fceil VR256:$src)),
+ (VROUNDYPSr VR256:$src, (i32 0x2))>;
+ def : Pat<(v8f32 (frint VR256:$src)),
+ (VROUNDYPSr VR256:$src, (i32 0x4))>;
+ def : Pat<(v8f32 (ftrunc VR256:$src)),
+ (VROUNDYPSr VR256:$src, (i32 0x3))>;
+
def : Pat<(v4f64 (ffloor VR256:$src)),
(VROUNDYPDr VR256:$src, (i32 0x1))>;
+ def : Pat<(v4f64 (fnearbyint VR256:$src)),
+ (VROUNDYPDr VR256:$src, (i32 0xC))>;
+ def : Pat<(v4f64 (fceil VR256:$src)),
+ (VROUNDYPDr VR256:$src, (i32 0x2))>;
+ def : Pat<(v4f64 (frint VR256:$src)),
+ (VROUNDYPDr VR256:$src, (i32 0x4))>;
+ def : Pat<(v4f64 (ftrunc VR256:$src)),
+ (VROUNDYPDr VR256:$src, (i32 0x3))>;
}
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
@@ -6417,8 +6452,25 @@ let Predicates = [UseSSE41] in {
def : Pat<(v4f32 (ffloor VR128:$src)),
(ROUNDPSr VR128:$src, (i32 0x1))>;
+ def : Pat<(v4f32 (fnearbyint VR128:$src)),
+ (ROUNDPSr VR128:$src, (i32 0xC))>;
+ def : Pat<(v4f32 (fceil VR128:$src)),
+ (ROUNDPSr VR128:$src, (i32 0x2))>;
+ def : Pat<(v4f32 (frint VR128:$src)),
+ (ROUNDPSr VR128:$src, (i32 0x4))>;
+ def : Pat<(v4f32 (ftrunc VR128:$src)),
+ (ROUNDPSr VR128:$src, (i32 0x3))>;
+
def : Pat<(v2f64 (ffloor VR128:$src)),
(ROUNDPDr VR128:$src, (i32 0x1))>;
+ def : Pat<(v2f64 (fnearbyint VR128:$src)),
+ (ROUNDPDr VR128:$src, (i32 0xC))>;
+ def : Pat<(v2f64 (fceil VR128:$src)),
+ (ROUNDPDr VR128:$src, (i32 0x2))>;
+ def : Pat<(v2f64 (frint VR128:$src)),
+ (ROUNDPDr VR128:$src, (i32 0x4))>;
+ def : Pat<(v2f64 (ftrunc VR128:$src)),
+ (ROUNDPDr VR128:$src, (i32 0x3))>;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 20bc85e65f..083d75666f 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -21,6 +21,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
@@ -549,18 +550,14 @@ ReSimplify:
OutMI.setOpcode(X86::RET);
break;
- case X86::MORESTACK_RET_RESTORE_R10: {
- MCInst retInst;
-
+ case X86::MORESTACK_RET_RESTORE_R10:
OutMI.setOpcode(X86::MOV64rr);
OutMI.addOperand(MCOperand::CreateReg(X86::R10));
OutMI.addOperand(MCOperand::CreateReg(X86::RAX));
- retInst.setOpcode(X86::RET);
- AsmPrinter.OutStreamer.EmitInstruction(retInst);
+ AsmPrinter.OutStreamer.EmitInstruction(MCInstBuilder(X86::RET));
break;
}
- }
}
static void LowerTlsAddr(MCStreamer &OutStreamer,
@@ -574,11 +571,8 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
MCContext &context = OutStreamer.getContext();
- if (needsPadding) {
- MCInst prefix;
- prefix.setOpcode(X86::DATA16_PREFIX);
- OutStreamer.EmitInstruction(prefix);
- }
+ if (needsPadding)
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX));
MCSymbolRefExpr::VariantKind SRVK;
switch (MI.getOpcode()) {
@@ -628,20 +622,11 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
OutStreamer.EmitInstruction(LEA);
if (needsPadding) {
- MCInst prefix;
- prefix.setOpcode(X86::DATA16_PREFIX);
- OutStreamer.EmitInstruction(prefix);
- prefix.setOpcode(X86::DATA16_PREFIX);
- OutStreamer.EmitInstruction(prefix);
- prefix.setOpcode(X86::REX64_PREFIX);
- OutStreamer.EmitInstruction(prefix);
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::REX64_PREFIX));
}
- MCInst call;
- if (is64Bits)
- call.setOpcode(X86::CALL64pcrel32);
- else
- call.setOpcode(X86::CALLpcrel32);
StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
MCSymbol *tlsGetAddr = context.GetOrCreateSymbol(name);
const MCSymbolRefExpr *tlsRef =
@@ -649,8 +634,9 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
MCSymbolRefExpr::VK_PLT,
context);
- call.addOperand(MCOperand::CreateExpr(tlsRef));
- OutStreamer.EmitInstruction(call);
+ OutStreamer.EmitInstruction(MCInstBuilder(is64Bits ? X86::CALL64pcrel32
+ : X86::CALLpcrel32)
+ .addExpr(tlsRef));
}
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
@@ -694,7 +680,6 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
return LowerTlsAddr(OutStreamer, MCInstLowering, *MI);
case X86::MOVPC32r: {
- MCInst TmpInst;
// This is a pseudo op for a two instruction sequence with a label, which
// looks like:
// call "L1$pb"
@@ -703,26 +688,22 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Emit the call.
MCSymbol *PICBase = MF->getPICBaseSymbol();
- // @LOCALMOD-BEGIN
- // For NaCl, the call should be aligned to the end of a bundle. Since the
- // call is at the end of the bundle, there should be no padding between
- // the call and the next instruction (the label should still make sense).
- TmpInst.setOpcode(getSubtarget().isTargetNaCl() ?
- X86::NACL_CALL32d : X86::CALLpcrel32);
- // @LOCALMOD-END
// FIXME: We would like an efficient form for this, so we don't have to do a
// lot of extra uniquing.
- TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase,
- OutContext)));
- OutStreamer.EmitInstruction(TmpInst);
+ // LOCALMOD: For NaCl, the call should be aligned to the end of a bundle. Since the
+ // call is at the end of the bundle, there should be no padding between
+ // the call and the next instruction (the label should still make sense).
+
+ OutStreamer.EmitInstruction(MCInstBuilder(
+ getSubtarget().isTargetNaCl() ? X86::NACL_CALL32d : X86::CALLpcrel32) // @LOCALMOD
+ .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
// Emit the label.
OutStreamer.EmitLabel(PICBase);
// popl $reg
- TmpInst.setOpcode(X86::POP32r);
- TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg());
- OutStreamer.EmitInstruction(TmpInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::POP32r)
+ .addReg(MI->getOperand(0).getReg()));
return;
}
@@ -752,12 +733,10 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
DotExpr, OutContext);
- MCInst TmpInst;
- TmpInst.setOpcode(X86::ADD32ri);
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
- TmpInst.addOperand(MCOperand::CreateExpr(DotExpr));
- OutStreamer.EmitInstruction(TmpInst);
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::ADD32ri)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(DotExpr));
return;
}
}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 9054345d35..f95071792a 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -195,6 +195,11 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
return &X86::GR64_TCW64RegClass;
if (TM.getSubtarget<X86Subtarget>().is64Bit())
return &X86::GR64_TCRegClass;
+
+ const Function *F = MF.getFunction();
+ bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false);
+ if (hasHipeCC)
+ return &X86::GR32RegClass;
return &X86::GR32_TCRegClass;
}
}
@@ -235,6 +240,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
bool callsEHReturn = false;
bool ghcCall = false;
bool oclBiCall = false;
+ bool hipeCall = false;
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
if (MF) {
@@ -242,9 +248,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const Function *F = MF->getFunction();
ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
oclBiCall = (F ? F->getCallingConv() == CallingConv::Intel_OCL_BI : false);
+ hipeCall = (F ? F->getCallingConv() == CallingConv::HiPE : false);
}
- if (ghcCall)
+ if (ghcCall || hipeCall)
return CSR_NoRegs_SaveList;
if (oclBiCall) {
if (HasAVX && IsWin64)
@@ -278,7 +285,7 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
if (!HasAVX && !IsWin64 && Is64Bit)
return CSR_64_Intel_OCL_BI_RegMask;
}
- if (CC == CallingConv::GHC)
+ if (CC == CallingConv::GHC || CC == CallingConv::HiPE)
return CSR_NoRegs_RegMask;
if (!Is64Bit)
return CSR_32_RegMask;
@@ -812,46 +819,3 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
}
}
}
-
-namespace {
- struct MSAH : public MachineFunctionPass {
- static char ID;
- MSAH() : MachineFunctionPass(ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF) {
- const X86TargetMachine *TM =
- static_cast<const X86TargetMachine *>(&MF.getTarget());
- const TargetFrameLowering *TFI = TM->getFrameLowering();
- MachineRegisterInfo &RI = MF.getRegInfo();
- X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
- unsigned StackAlignment = TFI->getStackAlignment();
-
- // Be over-conservative: scan over all vreg defs and find whether vector
- // registers are used. If yes, there is a possibility that vector register
- // will be spilled and thus require dynamic stack realignment.
- for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) {
- FuncInfo->setForceFramePointer(true);
- return true;
- }
- }
- // Nothing to do
- return false;
- }
-
- virtual const char *getPassName() const {
- return "X86 Maximal Stack Alignment Check";
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
-
- char MSAH::ID = 0;
-}
-
-FunctionPass*
-llvm::createX86MaxStackAlignmentHeuristicPass() { return new MSAH(); }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 59c037f296..5135946c97 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -176,7 +176,6 @@ bool X86PassConfig::addInstSelector() {
}
bool X86PassConfig::addPreRegAlloc() {
- addPass(createX86MaxStackAlignmentHeuristicPass());
return false; // -print-machineinstr shouldn't print after this.
}
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 4f39d68d40..1cfaeda0eb 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -23,9 +23,9 @@ using namespace llvm;
using namespace dwarf;
const MCExpr *X86_64MachoTargetObjectFile::
-getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
- MachineModuleInfo *MMI, unsigned Encoding,
- MCStreamer &Streamer) const {
+getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const {
// On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
// is an indirect pc-relative reference.
@@ -38,7 +38,7 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
}
return TargetLoweringObjectFileMachO::
- getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+ getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer);
}
MCSymbol *X86_64MachoTargetObjectFile::
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index 5fac48e57a..2a382e25af 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -21,9 +21,9 @@ namespace llvm {
class X86_64MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
public:
virtual const MCExpr *
- getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
- MachineModuleInfo *MMI, unsigned Encoding,
- MCStreamer &Streamer) const;
+ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI, unsigned Encoding,
+ MCStreamer &Streamer) const;
// getCFIPersonalitySymbol - The symbol that gets passed to
// .cfi_personality.
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 9e7816e21f..f1098f9dc3 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -225,20 +225,16 @@ getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
{
// FIXME there is no actual debug info here
DebugLoc dl = GA.getDebugLoc();
- if (isa<Function>(GV)) {
- return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA);
+ const GlobalValue *UnderlyingGV = GV;
+ // If GV is an alias then use the aliasee to determine the wrapper type
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ UnderlyingGV = GA->resolveAliasedGlobal();
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(UnderlyingGV)) {
+ if (GVar->isConstant())
+ return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
+ return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA);
}
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
- if (!GVar) {
- // If GV is an alias then use the aliasee to determine constness
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
- }
- bool isConst = GVar && GVar->isConstant();
- if (isConst) {
- return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
- }
- return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA);
+ return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA);
}
SDValue XCoreTargetLowering::
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 8a0274b5ff..be48b2063f 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -611,7 +611,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Recompute the parameter attributes list based on the new arguments for
// the function.
- NF->setAttributes(AttrListPtr::get(AttributesVec));
+ NF->setAttributes(AttrListPtr::get(F->getContext(), AttributesVec));
AttributesVec.clear();
F->getParent()->getFunctionList().insert(F, NF);
@@ -731,11 +731,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
Args, "", Call);
cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
- cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec));
+ cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(II->getContext(),
+ AttributesVec));
} else {
New = CallInst::Create(NF, Args, "", Call);
cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
- cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec));
+ cast<CallInst>(New)->setAttributes(AttrListPtr::get(New->getContext(),
+ AttributesVec));
if (cast<CallInst>(Call)->isTailCall())
cast<CallInst>(New)->setTailCall();
}
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index fc22548db7..4cfd0b235a 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -280,7 +280,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
if (FnAttrs.hasAttributes())
AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
FnAttrs));
- PAL = AttrListPtr::get(AttributesVec);
+ PAL = AttrListPtr::get(Fn.getContext(), AttributesVec);
}
Instruction *New;
@@ -806,7 +806,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
FnAttrs));
// Reconstruct the AttributesList based on the vector we constructed.
- AttrListPtr NewPAL = AttrListPtr::get(AttributesVec);
+ AttrListPtr NewPAL = AttrListPtr::get(F->getContext(), AttributesVec);
// Create the new function type based on the recomputed parameters.
FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
@@ -874,7 +874,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
FnAttrs));
// Reconstruct the AttributesList based on the vector we constructed.
- AttrListPtr NewCallPAL = AttrListPtr::get(AttributesVec);
+ AttrListPtr NewCallPAL = AttrListPtr::get(F->getContext(), AttributesVec);
Instruction *New;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 678189b3d6..591278fa62 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -225,6 +225,7 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
// Don't hack on volatile stores.
if (SI->isVolatile()) return true;
+
GS.Ordering = StrongerOrdering(GS.Ordering, SI->getOrdering());
// If this is a direct store to the global (i.e., the global is a scalar
@@ -234,6 +235,14 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(
SI->getOperand(1))) {
Value *StoredVal = SI->getOperand(0);
+
+ if (Constant *C = dyn_cast<Constant>(StoredVal)) {
+ if (C->isThreadDependent()) {
+ // The stored value changes between threads; don't track it.
+ return true;
+ }
+ }
+
if (StoredVal == GV->getInitializer()) {
if (GS.StoredType < GlobalStatus::isInitializerStored)
GS.StoredType = GlobalStatus::isInitializerStored;
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index b1c36c15db..6f4b810acc 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -32,6 +32,7 @@ namespace {
// AlwaysInliner only inlines functions that are mark as "always inline".
class AlwaysInliner : public Inliner {
+ InlineCostAnalyzer CA;
public:
// Use extremely low threshold.
AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/true) {
@@ -63,35 +64,6 @@ Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) {
return new AlwaysInliner(InsertLifetime);
}
-/// \brief Minimal filter to detect invalid constructs for inlining.
-static bool isInlineViable(Function &F) {
- bool ReturnsTwice =F.getFnAttributes().hasAttribute(Attributes::ReturnsTwice);
- for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
- // Disallow inlining of functions which contain an indirect branch.
- if (isa<IndirectBrInst>(BI->getTerminator()))
- return false;
-
- for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
- ++II) {
- CallSite CS(II);
- if (!CS)
- continue;
-
- // Disallow recursive calls.
- if (&F == CS.getCalledFunction())
- return false;
-
- // Disallow calls which expose returns-twice to a function not previously
- // attributed as such.
- if (!ReturnsTwice && CS.isCall() &&
- cast<CallInst>(CS.getInstruction())->canReturnTwice())
- return false;
- }
- }
-
- return true;
-}
-
/// \brief Get the inline cost for the always-inliner.
///
/// The always inliner *only* handles functions which are marked with the
@@ -106,27 +78,21 @@ static bool isInlineViable(Function &F) {
/// likely not worth it in practice.
InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
Function *Callee = CS.getCalledFunction();
- // We assume indirect calls aren't calling an always-inline function.
- if (!Callee) return InlineCost::getNever();
-
- // We can't inline calls to external functions.
- // FIXME: We shouldn't even get here.
- if (Callee->isDeclaration()) return InlineCost::getNever();
-
- // Return never for anything not marked as always inline.
- if (!Callee->getFnAttributes().hasAttribute(Attributes::AlwaysInline))
- return InlineCost::getNever();
- // Do some minimal analysis to preclude non-viable functions.
- if (!isInlineViable(*Callee))
- return InlineCost::getNever();
+ // Only inline direct calls to functions with always-inline attributes
+ // that are viable for inlining. FIXME: We shouldn't even get here for
+ // declarations.
+ if (Callee && !Callee->isDeclaration() &&
+ Callee->getFnAttributes().hasAttribute(Attributes::AlwaysInline) &&
+ CA.isInlineViable(*Callee))
+ return InlineCost::getAlways();
- // Otherwise, force inlining.
- return InlineCost::getAlways();
+ return InlineCost::getNever();
}
// doInitialization - Initializes the vector of functions that have not
// been annotated with the "always inline" attribute.
bool AlwaysInliner::doInitialization(CallGraph &CG) {
+ CA.setDataLayout(getAnalysisIfAvailable<DataLayout>());
return false;
}
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 05253fcdda..48e20ec339 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -323,7 +323,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
PM.add(createGlobalDCEPass());
}
-LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate(void) {
+LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
PassManagerBuilder *PMB = new PassManagerBuilder();
return wrap(PMB);
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 7d0af0d802..12faedb0ff 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -269,7 +269,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
/// true, otherwise (V < Lo || V >= Hi). In practice, we emit the more efficient
-/// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates
+/// (V-Lo) \<u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates
/// whether to treat the V, Lo and HI as signed or not. IB is the location to
/// insert new instructions.
Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
@@ -2159,6 +2159,27 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
I.setOperand(1, NewRHS);
return &I;
}
+ } else if (Op0I->getOpcode() == Instruction::LShr) {
+ // ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3)
+ // E1 = "X ^ C1"
+ BinaryOperator *E1;
+ ConstantInt *C1;
+ if (Op0I->hasOneUse() &&
+ (E1 = dyn_cast<BinaryOperator>(Op0I->getOperand(0))) &&
+ E1->getOpcode() == Instruction::Xor &&
+ (C1 = dyn_cast<ConstantInt>(E1->getOperand(1)))) {
+ // fold (C1 >> C2) ^ C3
+ ConstantInt *C2 = Op0CI, *C3 = RHS;
+ APInt FoldConst = C1->getValue().lshr(C2->getValue());
+ FoldConst ^= C3->getValue();
+ // Prepare the two operands.
+ Value *Opnd0 = Builder->CreateLShr(E1->getOperand(0), C2);
+ Opnd0->takeName(Op0I);
+ cast<Instruction>(Opnd0)->setDebugLoc(I.getDebugLoc());
+ Value *FoldVal = ConstantInt::get(Opnd0->getType(), FoldConst);
+
+ return BinaryOperator::CreateXor(Opnd0, FoldVal);
+ }
}
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 4f4c388a92..aa9512cf03 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1179,7 +1179,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (NewRetTy->isVoidTy())
Caller->setName(""); // Void type should not have a name.
- const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec);
+ const AttrListPtr &NewCallerPAL = AttrListPtr::get(Callee->getContext(),
+ attrVec);
Instruction *NC;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
@@ -1357,7 +1358,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
NestF->getType() == PointerType::getUnqual(NewFTy) ?
NestF : ConstantExpr::getBitCast(NestF,
PointerType::getUnqual(NewFTy));
- const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs);
+ const AttrListPtr &NewPAL = AttrListPtr::get(FTy->getContext(), NewAttrs);
Instruction *NewCaller;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 8cb4a59cba..7c3f8fe15d 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2356,8 +2356,25 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// Try not to increase register pressure.
BO0->hasOneUse() && BO1->hasOneUse()) {
// Determine Y and Z in the form icmp (X+Y), (X+Z).
- Value *Y = (A == C || A == D) ? B : A;
- Value *Z = (C == A || C == B) ? D : C;
+ Value *Y, *Z;
+ if (A == C) {
+ // C + B == C + D -> B == D
+ Y = B;
+ Z = D;
+ } else if (A == D) {
+ // D + B == C + D -> B == C
+ Y = B;
+ Z = C;
+ } else if (B == C) {
+ // A + C == C + D -> A == D
+ Y = A;
+ Z = D;
+ } else {
+ assert(B == D);
+ // A + D == C + D -> A == C
+ Y = A;
+ Z = C;
+ }
return new ICmpInst(Pred, Y, Z);
}
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 4ab5b6e4a0..fd684200fc 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -150,25 +150,6 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
return 0;
}
-/// getPointeeAlignment - Compute the minimum alignment of the value pointed
-/// to by the given pointer.
-static unsigned getPointeeAlignment(Value *V, const DataLayout &TD) {
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- if (CE->getOpcode() == Instruction::BitCast ||
- (CE->getOpcode() == Instruction::GetElementPtr &&
- cast<GEPOperator>(CE)->hasAllZeroIndices()))
- return getPointeeAlignment(CE->getOperand(0), TD);
-
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
- if (!GV->isDeclaration())
- return TD.getPreferredAlignment(GV);
-
- if (PointerType *PT = dyn_cast<PointerType>(V->getType()))
- return TD.getABITypeAlignment(PT->getElementType());
-
- return 0;
-}
-
Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Ensure that the alloca array size argument has type intptr_t, so that
// any casting is exposed early.
@@ -264,7 +245,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
}
}
- if (TD) {
+ if (AI.getAlignment()) {
// Check to see if this allocation is only modified by a memcpy/memmove from
// a constant global whose alignment is equal to or exceeds that of the
// allocation. If this is the case, we can change all users to use
@@ -273,7 +254,9 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// is only subsequently read.
SmallVector<Instruction *, 4> ToDelete;
if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
- if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
+ unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(),
+ AI.getAlignment(), TD);
+ if (AI.getAlignment() <= SourceAlign) {
DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 9a46f25e66..feef2ccee4 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -44,6 +44,7 @@
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
@@ -65,6 +66,11 @@ STATISTIC(NumExpand, "Number of expansions");
STATISTIC(NumFactor , "Number of factorizations");
STATISTIC(NumReassoc , "Number of reassociations");
+static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
+ cl::init(false),
+ cl::desc("Enable unsafe double to float "
+ "shrinking for math lib calls"));
+
// Initialization Routines
void llvm::initializeInstCombine(PassRegistry &Registry) {
initializeInstCombinerPass(Registry);
@@ -2374,7 +2380,7 @@ public:
InstCombinerLibCallSimplifier(const DataLayout *TD,
const TargetLibraryInfo *TLI,
InstCombiner *IC)
- : LibCallSimplifier(TD, TLI) {
+ : LibCallSimplifier(TD, TLI, UnsafeFPShrink) {
this->IC = IC;
}
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index b7be4625ca..4e05c3200c 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -69,6 +69,7 @@ static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
static const char *kAsanMappingScaleName = "__asan_mapping_scale";
static const char *kAsanStackMallocName = "__asan_stack_malloc";
static const char *kAsanStackFreeName = "__asan_stack_free";
+static const char *kAsanGenPrefix = "__asan_gen_";
static const int kAsanStackLeftRedzoneMagic = 0xf1;
static const int kAsanStackMidRedzoneMagic = 0xf2;
@@ -148,6 +149,41 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
cl::Hidden, cl::init(-1));
namespace {
+/// A set of dynamically initialized globals extracted from metadata.
+class SetOfDynamicallyInitializedGlobals {
+ public:
+ void Init(Module& M) {
+ // Clang generates metadata identifying all dynamically initialized globals.
+ NamedMDNode *DynamicGlobals =
+ M.getNamedMetadata("llvm.asan.dynamically_initialized_globals");
+ if (!DynamicGlobals)
+ return;
+ for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) {
+ MDNode *MDN = DynamicGlobals->getOperand(i);
+ assert(MDN->getNumOperands() == 1);
+ Value *VG = MDN->getOperand(0);
+ // The optimizer may optimize away a global entirely, in which case we
+ // cannot instrument access to it.
+ if (!VG)
+ continue;
+ DynInitGlobals.insert(cast<GlobalVariable>(VG));
+ }
+ }
+ bool Contains(GlobalVariable *G) { return DynInitGlobals.count(G) != 0; }
+ private:
+ SmallSet<GlobalValue*, 32> DynInitGlobals;
+};
+
+static int MappingScale() {
+ return ClMappingScale ? ClMappingScale : kDefaultShadowScale;
+}
+
+static size_t RedzoneSize() {
+ // Redzone used for stack and globals is at least 32 bytes.
+ // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
+ return std::max(32U, 1U << MappingScale());
+}
+
/// AddressSanitizer: instrument the code in module to find memory bugs.
struct AddressSanitizer : public FunctionPass {
AddressSanitizer();
@@ -171,7 +207,6 @@ struct AddressSanitizer : public FunctionPass {
bool poisonStackInFunction(Function &F);
virtual bool doInitialization(Module &M);
virtual bool doFinalization(Module &M);
- bool insertGlobalRedzones(Module &M);
static char ID; // Pass identification, replacement for typeid
private:
@@ -181,27 +216,23 @@ struct AddressSanitizer : public FunctionPass {
return SizeInBytes;
}
uint64_t getAlignedSize(uint64_t SizeInBytes) {
- return ((SizeInBytes + RedzoneSize - 1)
- / RedzoneSize) * RedzoneSize;
+ size_t RZ = RedzoneSize();
+ return ((SizeInBytes + RZ - 1) / RZ) * RZ;
}
uint64_t getAlignedAllocaSize(AllocaInst *AI) {
uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
return getAlignedSize(SizeInBytes);
}
- Function *checkInterfaceFunction(Constant *FuncOrBitcast);
bool ShouldInstrumentGlobal(GlobalVariable *G);
void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB,
Value *ShadowBase, bool DoPoison);
bool LooksLikeCodeInBug11395(Instruction *I);
void FindDynamicInitializers(Module &M);
- bool HasDynamicInitializer(GlobalVariable *G);
LLVMContext *C;
DataLayout *TD;
uint64_t MappingOffset;
- int MappingScale;
- size_t RedzoneSize;
int LongSize;
Type *IntptrTy;
Type *IntptrPtrTy;
@@ -209,13 +240,26 @@ struct AddressSanitizer : public FunctionPass {
Function *AsanInitFunction;
Function *AsanStackMallocFunc, *AsanStackFreeFunc;
Function *AsanHandleNoReturnFunc;
- Instruction *CtorInsertBefore;
OwningPtr<BlackList> BL;
// This array is indexed by AccessIsWrite and log2(AccessSize).
Function *AsanErrorCallback[2][kNumberOfAccessSizes];
InlineAsm *EmptyAsm;
- SmallSet<GlobalValue*, 32> DynamicallyInitializedGlobals;
- SmallSet<GlobalValue*, 32> GlobalsCreatedByAsan;
+ SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
+};
+
+// FIXME: inherit this from ModulePass and actually use it as a ModulePass.
+class AddressSanitizerCreateGlobalRedzonesPass {
+ public:
+ bool runOnModule(Module &M, DataLayout *TD);
+ private:
+ bool ShouldInstrumentGlobal(GlobalVariable *G);
+ void createInitializerPoisonCalls(Module &M, Value *FirstAddr,
+ Value *LastAddr);
+
+ OwningPtr<BlackList> BL;
+ SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
+ Type *IntptrTy;
+ LLVMContext *C;
};
} // namespace
@@ -243,12 +287,17 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
return new GlobalVariable(M, StrConst->getType(), true,
- GlobalValue::PrivateLinkage, StrConst, "");
+ GlobalValue::PrivateLinkage, StrConst,
+ kAsanGenPrefix);
+}
+
+static bool GlobalWasGeneratedByAsan(GlobalVariable *G) {
+ return G->getName().find(kAsanGenPrefix) == 0;
}
Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
// Shadow >> scale
- Shadow = IRB.CreateLShr(Shadow, MappingScale);
+ Shadow = IRB.CreateLShr(Shadow, MappingScale());
if (MappingOffset == 0)
return Shadow;
// (Shadow >> scale) | offset
@@ -328,30 +377,6 @@ static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) {
return NULL;
}
-void AddressSanitizer::FindDynamicInitializers(Module& M) {
- // Clang generates metadata identifying all dynamically initialized globals.
- NamedMDNode *DynamicGlobals =
- M.getNamedMetadata("llvm.asan.dynamically_initialized_globals");
- if (!DynamicGlobals)
- return;
- for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) {
- MDNode *MDN = DynamicGlobals->getOperand(i);
- assert(MDN->getNumOperands() == 1);
- Value *VG = MDN->getOperand(0);
- // The optimizer may optimize away a global entirely, in which case we
- // cannot instrument access to it.
- if (!VG)
- continue;
-
- GlobalVariable *G = cast<GlobalVariable>(VG);
- DynamicallyInitializedGlobals.insert(G);
- }
-}
-// Returns true if a global variable is initialized dynamically in this TU.
-bool AddressSanitizer::HasDynamicInitializer(GlobalVariable *G) {
- return DynamicallyInitializedGlobals.count(G);
-}
-
void AddressSanitizer::instrumentMop(Instruction *I) {
bool IsWrite = false;
Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
@@ -363,11 +388,9 @@ void AddressSanitizer::instrumentMop(Instruction *I) {
if (!ClInitializers)
return;
// If a global variable does not have dynamic initialization we don't
- // have to instrument it. However, if a global has external linkage, we
- // assume it has dynamic initialization, as it may have an initializer
- // in a different TU.
- if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
- !HasDynamicInitializer(G))
+ // have to instrument it. However, if a global does not have initailizer
+ // at all, we assume it has dynamic initializer (in other TU).
+ if (G->hasInitializer() && !DynamicallyInitializedGlobals.Contains(G))
return;
}
}
@@ -392,7 +415,7 @@ void AddressSanitizer::instrumentMop(Instruction *I) {
// function of AddressSanitizer. If the instrumented module defines a function
// with the same name, their prototypes must match, otherwise
// getOrInsertFunction returns a bitcast.
-Function *AddressSanitizer::checkInterfaceFunction(Constant *FuncOrBitcast) {
+static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
if (isa<Function>(FuncOrBitcast)) return cast<Function>(FuncOrBitcast);
FuncOrBitcast->dump();
report_fatal_error("trying to redefine an AddressSanitizer "
@@ -415,7 +438,7 @@ Instruction *AddressSanitizer::generateCrashCode(
Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
Value *ShadowValue,
uint32_t TypeSize) {
- size_t Granularity = 1 << MappingScale;
+ size_t Granularity = 1 << MappingScale();
// Addr & (Granularity - 1)
Value *LastAccessedByte = IRB.CreateAnd(
AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
@@ -436,7 +459,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
Type *ShadowTy = IntegerType::get(
- *C, std::max(8U, TypeSize >> MappingScale));
+ *C, std::max(8U, TypeSize >> MappingScale()));
Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
Value *ShadowPtr = memToShadow(AddrLong, IRB);
Value *CmpVal = Constant::getNullValue(ShadowTy);
@@ -445,7 +468,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
- size_t Granularity = 1 << MappingScale;
+ size_t Granularity = 1 << MappingScale();
TerminatorInst *CrashTerm = 0;
if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
@@ -469,9 +492,8 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
Crash->setDebugLoc(OrigIns->getDebugLoc());
}
-void AddressSanitizer::createInitializerPoisonCalls(Module &M,
- Value *FirstAddr,
- Value *LastAddr) {
+void AddressSanitizerCreateGlobalRedzonesPass::createInitializerPoisonCalls(
+ Module &M, Value *FirstAddr, Value *LastAddr) {
// We do all of our poisoning and unpoisoning within _GLOBAL__I_a.
Function *GlobalInit = M.getFunction("_GLOBAL__I_a");
// If that function is not present, this TU contains no globals, or they have
@@ -502,14 +524,15 @@ void AddressSanitizer::createInitializerPoisonCalls(Module &M,
}
}
-bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
+bool AddressSanitizerCreateGlobalRedzonesPass::ShouldInstrumentGlobal(
+ GlobalVariable *G) {
Type *Ty = cast<PointerType>(G->getType())->getElementType();
DEBUG(dbgs() << "GLOBAL: " << *G << "\n");
if (BL->isIn(*G)) return false;
if (!Ty->isSized()) return false;
if (!G->hasInitializer()) return false;
- if (GlobalsCreatedByAsan.count(G)) return false; // Our own global.
+ if (GlobalWasGeneratedByAsan(G)) return false; // Our own global.
// Touch only those globals that will not be defined in other modules.
// Don't handle ODR type linkages since other modules may be built w/o asan.
if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
@@ -522,7 +545,7 @@ bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
if (G->isThreadLocal())
return false;
// For now, just ignore this Alloca if the alignment is large.
- if (G->getAlignment() > RedzoneSize) return false;
+ if (G->getAlignment() > RedzoneSize()) return false;
// Ignore all the globals with the names starting with "\01L_OBJC_".
// Many of those are put into the .cstring section. The linker compresses
@@ -564,7 +587,13 @@ bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
// This function replaces all global variables with new variables that have
// trailing redzones. It also creates a function that poisons
// redzones and inserts this function into llvm.global_ctors.
-bool AddressSanitizer::insertGlobalRedzones(Module &M) {
+bool AddressSanitizerCreateGlobalRedzonesPass::runOnModule(Module &M,
+ DataLayout *TD) {
+ BL.reset(new BlackList(ClBlackListFile));
+ DynamicallyInitializedGlobals.Init(M);
+ C = &(M.getContext());
+ IntptrTy = Type::getIntNTy(*C, TD->getPointerSizeInBits());
+
SmallVector<GlobalVariable *, 16> GlobalsToChange;
for (Module::GlobalListType::iterator G = M.global_begin(),
@@ -588,10 +617,10 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
IntptrTy, NULL);
SmallVector<Constant *, 16> Initializers(n), DynamicInit;
- IRBuilder<> IRB(CtorInsertBefore);
- if (ClInitializers)
- FindDynamicInitializers(M);
+ Function *CtorFunc = M.getFunction(kAsanModuleCtorName);
+ assert(CtorFunc);
+ IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
// The addresses of the first and last dynamically initialized globals in
// this TU. Used in initialization order checking.
@@ -602,11 +631,12 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
PointerType *PtrTy = cast<PointerType>(G->getType());
Type *Ty = PtrTy->getElementType();
uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
- uint64_t RightRedzoneSize = RedzoneSize +
- (RedzoneSize - (SizeInBytes % RedzoneSize));
+ size_t RZ = RedzoneSize();
+ uint64_t RightRedzoneSize = RZ + (RZ - (SizeInBytes % RZ));
Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
// Determine whether this global should be poisoned in initialization.
- bool GlobalHasDynamicInitializer = HasDynamicInitializer(G);
+ bool GlobalHasDynamicInitializer =
+ DynamicallyInitializedGlobals.Contains(G);
// Don't check initialization order if this global is blacklisted.
GlobalHasDynamicInitializer &= !BL->isInInit(*G);
@@ -626,7 +656,7 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
M, NewTy, G->isConstant(), G->getLinkage(),
NewInitializer, "", G, G->getThreadLocalMode());
NewGlobal->copyAttributesFrom(G);
- NewGlobal->setAlignment(RedzoneSize);
+ NewGlobal->setAlignment(RZ);
Value *Indices2[2];
Indices2[0] = IRB.getInt32(0);
@@ -704,6 +734,7 @@ bool AddressSanitizer::doInitialization(Module &M) {
if (!TD)
return false;
BL.reset(new BlackList(ClBlackListFile));
+ DynamicallyInitializedGlobals.Init(M);
C = &(M.getContext());
LongSize = TD->getPointerSizeInBits();
@@ -714,10 +745,8 @@ bool AddressSanitizer::doInitialization(Module &M) {
FunctionType::get(Type::getVoidTy(*C), false),
GlobalValue::InternalLinkage, kAsanModuleCtorName, &M);
BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction);
- CtorInsertBefore = ReturnInst::Create(*C, AsanCtorBB);
-
// call __asan_init in the module ctor.
- IRBuilder<> IRB(CtorInsertBefore);
+ IRBuilder<> IRB(ReturnInst::Create(*C, AsanCtorBB));
AsanInitFunction = checkInterfaceFunction(
M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), NULL));
AsanInitFunction->setLinkage(Function::ExternalLinkage);
@@ -763,13 +792,6 @@ bool AddressSanitizer::doInitialization(Module &M) {
MappingOffset = 1ULL << ClMappingOffsetLog;
}
}
- MappingScale = kDefaultShadowScale;
- if (ClMappingScale) {
- MappingScale = ClMappingScale;
- }
- // Redzone used for stack and globals is at least 32 bytes.
- // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
- RedzoneSize = std::max(32, (int)(1 << MappingScale));
if (ClMappingOffsetLog >= 0) {
@@ -784,7 +806,7 @@ bool AddressSanitizer::doInitialization(Module &M) {
if (ClMappingScale) {
GlobalValue *asan_mapping_scale =
new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
- ConstantInt::get(IntptrTy, MappingScale),
+ ConstantInt::get(IntptrTy, MappingScale()),
kAsanMappingScaleName);
// Read the global, otherwise it may be optimized away.
IRB.CreateLoad(asan_mapping_scale, true);
@@ -798,8 +820,11 @@ bool AddressSanitizer::doInitialization(Module &M) {
bool AddressSanitizer::doFinalization(Module &M) {
// We transform the globals at the very end so that the optimization analysis
// works on the original globals.
- if (ClGlobals)
- return insertGlobalRedzones(M);
+ if (ClGlobals) {
+ // FIXME: instead of doFinalization, run this as a true ModulePass.
+ AddressSanitizerCreateGlobalRedzonesPass Pass;
+ return Pass.runOnModule(M, TD);
+ }
return false;
}
@@ -912,10 +937,10 @@ static uint64_t ValueForPoison(uint64_t PoisonByte, size_t ShadowRedzoneSize) {
static void PoisonShadowPartialRightRedzone(uint8_t *Shadow,
size_t Size,
- size_t RedzoneSize,
+ size_t RZSize,
size_t ShadowGranularity,
uint8_t Magic) {
- for (size_t i = 0; i < RedzoneSize;
+ for (size_t i = 0; i < RZSize;
i+= ShadowGranularity, Shadow++) {
if (i + ShadowGranularity <= Size) {
*Shadow = 0; // fully addressable
@@ -930,7 +955,7 @@ static void PoisonShadowPartialRightRedzone(uint8_t *Shadow,
void AddressSanitizer::PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec,
IRBuilder<> IRB,
Value *ShadowBase, bool DoPoison) {
- size_t ShadowRZSize = RedzoneSize >> MappingScale;
+ size_t ShadowRZSize = RedzoneSize() >> MappingScale();
assert(ShadowRZSize >= 1 && ShadowRZSize <= 4);
Type *RZTy = Type::getIntNTy(*C, ShadowRZSize * 8);
Type *RZPtrTy = PointerType::get(RZTy, 0);
@@ -946,12 +971,12 @@ void AddressSanitizer::PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec,
IRB.CreateStore(PoisonLeft, IRB.CreateIntToPtr(ShadowBase, RZPtrTy));
// poison all other red zones.
- uint64_t Pos = RedzoneSize;
+ uint64_t Pos = RedzoneSize();
for (size_t i = 0, n = AllocaVec.size(); i < n; i++) {
AllocaInst *AI = AllocaVec[i];
uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
uint64_t AlignedSize = getAlignedAllocaSize(AI);
- assert(AlignedSize - SizeInBytes < RedzoneSize);
+ assert(AlignedSize - SizeInBytes < RedzoneSize());
Value *Ptr = NULL;
Pos += AlignedSize;
@@ -961,13 +986,13 @@ void AddressSanitizer::PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec,
// Poison the partial redzone at right
Ptr = IRB.CreateAdd(
ShadowBase, ConstantInt::get(IntptrTy,
- (Pos >> MappingScale) - ShadowRZSize));
- size_t AddressableBytes = RedzoneSize - (AlignedSize - SizeInBytes);
+ (Pos >> MappingScale()) - ShadowRZSize));
+ size_t AddressableBytes = RedzoneSize() - (AlignedSize - SizeInBytes);
uint32_t Poison = 0;
if (DoPoison) {
PoisonShadowPartialRightRedzone((uint8_t*)&Poison, AddressableBytes,
- RedzoneSize,
- 1ULL << MappingScale,
+ RedzoneSize(),
+ 1ULL << MappingScale(),
kAsanStackPartialRedzoneMagic);
}
Value *PartialPoison = ConstantInt::get(RZTy, Poison);
@@ -976,11 +1001,11 @@ void AddressSanitizer::PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec,
// Poison the full redzone at right.
Ptr = IRB.CreateAdd(ShadowBase,
- ConstantInt::get(IntptrTy, Pos >> MappingScale));
+ ConstantInt::get(IntptrTy, Pos >> MappingScale()));
Value *Poison = i == AllocaVec.size() - 1 ? PoisonRight : PoisonMid;
IRB.CreateStore(Poison, IRB.CreateIntToPtr(Ptr, RZPtrTy));
- Pos += RedzoneSize;
+ Pos += RedzoneSize();
}
}
@@ -1032,7 +1057,7 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) {
if (AI->isArrayAllocation()) continue;
if (!AI->isStaticAlloca()) continue;
if (!AI->getAllocatedType()->isSized()) continue;
- if (AI->getAlignment() > RedzoneSize) continue;
+ if (AI->getAlignment() > RedzoneSize()) continue;
AllocaVec.push_back(AI);
uint64_t AlignedSize = getAlignedAllocaSize(AI);
TotalSize += AlignedSize;
@@ -1041,7 +1066,7 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) {
if (AllocaVec.empty()) return false;
- uint64_t LocalStackSize = TotalSize + (AllocaVec.size() + 1) * RedzoneSize;
+ uint64_t LocalStackSize = TotalSize + (AllocaVec.size() + 1) * RedzoneSize();
bool DoStackMalloc = ClUseAfterReturn
&& LocalStackSize <= kMaxStackMallocSize;
@@ -1053,7 +1078,7 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) {
Type *ByteArrayTy = ArrayType::get(IRB.getInt8Ty(), LocalStackSize);
AllocaInst *MyAlloca =
new AllocaInst(ByteArrayTy, "MyAlloca", InsBefore);
- MyAlloca->setAlignment(RedzoneSize);
+ MyAlloca->setAlignment(RedzoneSize());
assert(MyAlloca->isStaticAlloca());
Value *OrigStackBase = IRB.CreatePointerCast(MyAlloca, IntptrTy);
Value *LocalStackBase = OrigStackBase;
@@ -1068,7 +1093,7 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) {
raw_svector_ostream StackDescription(StackDescriptionStorage);
StackDescription << F.getName() << " " << AllocaVec.size() << " ";
- uint64_t Pos = RedzoneSize;
+ uint64_t Pos = RedzoneSize();
// Replace Alloca instructions with base+offset.
for (size_t i = 0, n = AllocaVec.size(); i < n; i++) {
AllocaInst *AI = AllocaVec[i];
@@ -1077,12 +1102,12 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) {
StackDescription << Pos << " " << SizeInBytes << " "
<< Name.size() << " " << Name << " ";
uint64_t AlignedSize = getAlignedAllocaSize(AI);
- assert((AlignedSize % RedzoneSize) == 0);
+ assert((AlignedSize % RedzoneSize()) == 0);
AI->replaceAllUsesWith(
IRB.CreateIntToPtr(
IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Pos)),
AI->getType()));
- Pos += AlignedSize + RedzoneSize;
+ Pos += AlignedSize + RedzoneSize();
}
assert(Pos == LocalStackSize);
@@ -1095,7 +1120,6 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) {
BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy);
GlobalVariable *StackDescriptionGlobal =
createPrivateGlobalForString(*F.getParent(), StackDescription.str());
- GlobalsCreatedByAsan.insert(StackDescriptionGlobal);
Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy);
IRB.CreateStore(Description, BasePlus1);
diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp
index ef34b8a56d..e02c631f7f 100644
--- a/lib/Transforms/Instrumentation/BlackList.cpp
+++ b/lib/Transforms/Instrumentation/BlackList.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Module.h"
@@ -92,12 +93,25 @@ bool BlackList::isIn(const Module &M) {
return inSection("src", M.getModuleIdentifier());
}
+static StringRef GetGVTypeString(const GlobalVariable &G) {
+ // Types of GlobalVariables are always pointer types.
+ Type *GType = G.getType()->getElementType();
+ // For now we support blacklisting struct types only.
+ if (StructType *SGType = dyn_cast<StructType>(GType)) {
+ if (!SGType->isLiteral())
+ return SGType->getName();
+ }
+ return "<unknown type>";
+}
+
bool BlackList::isInInit(const GlobalVariable &G) {
- return isIn(*G.getParent()) || inSection("global-init", G.getName());
+ return (isIn(*G.getParent()) ||
+ inSection("global-init", G.getName()) ||
+ inSection("global-init-type", GetGVTypeString(G)));
}
bool BlackList::inSection(const StringRef Section,
- const StringRef Query) {
+ const StringRef Query) {
Regex *FunctionRegex = Entries[Section];
return FunctionRegex ? FunctionRegex->match(Query) : false;
}
diff --git a/lib/Transforms/Instrumentation/BlackList.h b/lib/Transforms/Instrumentation/BlackList.h
index f3c05a5058..ee18a98567 100644
--- a/lib/Transforms/Instrumentation/BlackList.h
+++ b/lib/Transforms/Instrumentation/BlackList.h
@@ -18,6 +18,7 @@
// fun:*_ZN4base6subtle*
// global:*global_with_bad_access_or_initialization*
// global-init:*global_with_initialization_issues*
+// global-init-type:*Namespace::ClassName*
// src:file_with_tricky_code.cc
// ---
// Note that the wild card is in fact an llvm::Regex, but * is automatically
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 7810b1b8a3..2d5d5603c0 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -41,7 +41,7 @@ namespace {
struct BoundsChecking : public FunctionPass {
static char ID;
- BoundsChecking(unsigned _Penalty = 5) : FunctionPass(ID), Penalty(_Penalty){
+ BoundsChecking() : FunctionPass(ID) {
initializeBoundsCheckingPass(*PassRegistry::getPassRegistry());
}
@@ -59,7 +59,6 @@ namespace {
BuilderTy *Builder;
Instruction *Inst;
BasicBlock *TrapBB;
- unsigned Penalty;
BasicBlock *getTrapBB();
void emitBranchToTrap(Value *Cmp = 0);
@@ -208,6 +207,6 @@ bool BoundsChecking::runOnFunction(Function &F) {
return MadeChange;
}
-FunctionPass *llvm::createBoundsCheckingPass(unsigned Penalty) {
- return new BoundsChecking(Penalty);
+FunctionPass *llvm::createBoundsCheckingPass() {
+ return new BoundsChecking();
}
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index e9192e5cdd..a8adaa62d7 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -540,13 +540,13 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
// read it. Threads and invoke make this untrue.
// emit [(succs * preds) x i64*], logically [succ x [pred x i64*]].
+ size_t TableSize = Succs.size() * Preds.size();
Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
- ArrayType *EdgeTableTy = ArrayType::get(
- Int64PtrTy, Succs.size() * Preds.size());
+ ArrayType *EdgeTableTy = ArrayType::get(Int64PtrTy, TableSize);
- Constant **EdgeTable = new Constant*[Succs.size() * Preds.size()];
+ OwningArrayPtr<Constant *> EdgeTable(new Constant*[TableSize]);
Constant *NullValue = Constant::getNullValue(Int64PtrTy);
- for (int i = 0, ie = Succs.size() * Preds.size(); i != ie; ++i)
+ for (size_t i = 0; i != TableSize; ++i)
EdgeTable[i] = NullValue;
unsigned Edge = 0;
@@ -566,7 +566,7 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
Edge += Successors;
}
- ArrayRef<Constant*> V(&EdgeTable[0], Succs.size() * Preds.size());
+ ArrayRef<Constant*> V(&EdgeTable[0], TableSize);
GlobalVariable *EdgeTableGV =
new GlobalVariable(
*M, EdgeTableTy, true, GlobalValue::InternalLinkage,
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 9e10fc4416..d054b5e22f 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -198,7 +198,7 @@ bool ThreadSanitizer::doInitialization(Module &M) {
SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) +
"_compare_exchange_val");
TsanAtomicCAS[i] = checkInterfaceFunction(M.getOrInsertFunction(
- AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, NULL));
+ AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, NULL));
}
TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction(
"__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(),
@@ -391,7 +391,7 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
case NotAtomic: assert(false);
case Unordered: // Fall-through.
case Monotonic: v = 0; break;
- // case Consume: v = 1; break; // Not specified yet.
+ // case Consume: v = 1; break; // Not specified yet.
case Acquire: v = 2; break;
case Release: v = 3; break;
case AcquireRelease: v = 4; break;
@@ -400,6 +400,29 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
return IRB->getInt32(v);
}
+static ConstantInt *createFailOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
+ uint32_t v = 0;
+ switch (ord) {
+ case NotAtomic: assert(false);
+ case Unordered: // Fall-through.
+ case Monotonic: v = 0; break;
+ // case Consume: v = 1; break; // Not specified yet.
+ case Acquire: v = 2; break;
+ case Release: v = 0; break;
+ case AcquireRelease: v = 2; break;
+ case SequentiallyConsistent: v = 5; break;
+ }
+ return IRB->getInt32(v);
+}
+
+// Both llvm and ThreadSanitizer atomic operations are based on C++11/C1x
+// standards. For background see C++11 standard. A slightly older, publically
+// available draft of the standard (not entirely up-to-date, but close enough
+// for casual browsing) is available here:
+// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2011/n3242.pdf
+// The following page contains more background information:
+// http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/
+
bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
IRBuilder<> IRB(I);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
@@ -461,7 +484,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
IRB.CreateIntCast(CASI->getCompareOperand(), Ty, false),
IRB.CreateIntCast(CASI->getNewValOperand(), Ty, false),
- createOrdering(&IRB, CASI->getOrdering())};
+ createOrdering(&IRB, CASI->getOrdering()),
+ createFailOrdering(&IRB, CASI->getOrdering())};
CallInst *C = CallInst::Create(TsanAtomicCAS[Idx], ArrayRef<Value*>(Args));
ReplaceInstWithInst(I, C);
} else if (FenceInst *FI = dyn_cast<FenceInst>(I)) {
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 123ed0f4f3..b608a5535e 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -125,7 +125,7 @@ namespace {
bool MoveExtToFormExtLoad(Instruction *I);
bool OptimizeExtUses(Instruction *I);
bool OptimizeSelectInst(SelectInst *SI);
- bool DupRetToEnableTailCallOpts(ReturnInst *RI);
+ bool DupRetToEnableTailCallOpts(BasicBlock *BB);
bool PlaceDbgValues(Function &F);
};
}
@@ -689,10 +689,14 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
/// %tmp2 = tail call i32 @f2()
/// ret i32 %tmp2
/// @endcode
-bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
+bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
if (!TLI)
return false;
+ ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+ if (!RI)
+ return false;
+
PHINode *PN = 0;
BitCastInst *BCI = 0;
Value *V = RI->getReturnValue();
@@ -706,7 +710,6 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
return false;
}
- BasicBlock *BB = RI->getParent();
if (PN && PN->getParent() != BB)
return false;
@@ -1319,9 +1322,6 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
if (CallInst *CI = dyn_cast<CallInst>(I))
return OptimizeCallInst(CI);
- if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
- return DupRetToEnableTailCallOpts(RI);
-
if (SelectInst *SI = dyn_cast<SelectInst>(I))
return OptimizeSelectInst(SI);
@@ -1339,6 +1339,8 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
while (CurInstIterator != BB.end())
MadeChange |= OptimizeInst(CurInstIterator++);
+ MadeChange |= DupRetToEnableTailCallOpts(&BB);
+
return MadeChange;
}
diff --git a/lib/Transforms/Scalar/NaClCcRewrite.cpp b/lib/Transforms/Scalar/NaClCcRewrite.cpp
index 5eace7f39d..4ac361835d 100644
--- a/lib/Transforms/Scalar/NaClCcRewrite.cpp
+++ b/lib/Transforms/Scalar/NaClCcRewrite.cpp
@@ -596,7 +596,7 @@ void UpdateFunctionSignature(Function &F,
Attributes fattr = F.getAttributes().getFnAttributes();
if (fattr.hasAttributes())
new_attributes_vec.push_back(AttributeWithIndex::get(~0, fattr));
- F.setAttributes(AttrListPtr::get(new_attributes_vec));
+ F.setAttributes(AttrListPtr::get(F.getContext(), new_attributes_vec));
}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 09687d8909..377c07020d 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -339,36 +339,6 @@ static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
}
}
-/// EvaluateRepeatedConstant - Compute C op C op ... op C where the constant C
-/// is repeated Weight times.
-static Constant *EvaluateRepeatedConstant(unsigned Opcode, Constant *C,
- APInt Weight) {
- // For addition the result can be efficiently computed as the product of the
- // constant and the weight.
- if (Opcode == Instruction::Add)
- return ConstantExpr::getMul(C, ConstantInt::get(C->getContext(), Weight));
-
- // The weight might be huge, so compute by repeated squaring to ensure that
- // compile time is proportional to the logarithm of the weight.
- Constant *Result = 0;
- Constant *Power = C; // Successively C, C op C, (C op C) op (C op C) etc.
- // Visit the bits in Weight.
- while (Weight != 0) {
- // If the current bit in Weight is non-zero do Result = Result op Power.
- if (Weight[0])
- Result = Result ? ConstantExpr::get(Opcode, Result, Power) : Power;
- // Move on to the next bit if any more are non-zero.
- Weight = Weight.lshr(1);
- if (Weight.isMinValue())
- break;
- // Square the power.
- Power = ConstantExpr::get(Opcode, Power, Power);
- }
-
- assert(Result && "Only positive weights supported!");
- return Result;
-}
-
typedef std::pair<Value*, APInt> RepeatedValue;
/// LinearizeExprTree - Given an associative binary expression, return the leaf
@@ -382,9 +352,7 @@ typedef std::pair<Value*, APInt> RepeatedValue;
/// op
/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
///
-/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct, and
-/// they are all non-constant except possibly for the last one, which if it is
-/// constant will have weight one (Ops[N].second === 1).
+/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
///
/// This routine may modify the function, in which case it returns 'true'. The
/// changes it makes may well be destructive, changing the value computed by 'I'
@@ -455,10 +423,6 @@ static bool LinearizeExprTree(BinaryOperator *I,
assert(Instruction::isAssociative(Opcode) &&
Instruction::isCommutative(Opcode) &&
"Expected an associative and commutative operation!");
- // If we see an absorbing element then the entire expression must be equal to
- // it. For example, if this is a multiplication expression and zero occurs as
- // an operand somewhere in it then the result of the expression must be zero.
- Constant *Absorber = ConstantExpr::getBinOpAbsorber(Opcode, I->getType());
// Visit all operands of the expression, keeping track of their weight (the
// number of paths from the expression root to the operand, or if you like
@@ -506,13 +470,6 @@ static bool LinearizeExprTree(BinaryOperator *I,
DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
assert(!Op->use_empty() && "No uses, so how did we get to it?!");
- // If the expression contains an absorbing element then there is no need
- // to analyze it further: it must evaluate to the absorbing element.
- if (Op == Absorber && !Weight.isMinValue()) {
- Ops.push_back(std::make_pair(Absorber, APInt(Bitwidth, 1)));
- return MadeChange;
- }
-
// If this is a binary operation of the right kind with only one use then
// add its operands to the expression.
if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
@@ -604,7 +561,6 @@ static bool LinearizeExprTree(BinaryOperator *I,
// The leaves, repeated according to their weights, represent the linearized
// form of the expression.
- Constant *Cst = 0; // Accumulate constants here.
for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) {
Value *V = LeafOrder[i];
LeafMap::iterator It = Leaves.find(V);
@@ -618,31 +574,14 @@ static bool LinearizeExprTree(BinaryOperator *I,
continue;
// Ensure the leaf is only output once.
It->second = 0;
- // Glob all constants together into Cst.
- if (Constant *C = dyn_cast<Constant>(V)) {
- C = EvaluateRepeatedConstant(Opcode, C, Weight);
- Cst = Cst ? ConstantExpr::get(Opcode, Cst, C) : C;
- continue;
- }
- // Add non-constant
Ops.push_back(std::make_pair(V, Weight));
}
- // Add any constants back into Ops, all globbed together and reduced to having
- // weight 1 for the convenience of users.
- Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
- if (Cst && Cst != Identity) {
- // If combining multiple constants resulted in the absorber then the entire
- // expression must evaluate to the absorber.
- if (Cst == Absorber)
- Ops.clear();
- Ops.push_back(std::make_pair(Cst, APInt(Bitwidth, 1)));
- }
-
// For nilpotent operations or addition there may be no operands, for example
// because the expression was "X xor X" or consisted of 2^Bitwidth additions:
// in both cases the weight reduces to 0 causing the value to be skipped.
if (Ops.empty()) {
+ Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
assert(Identity && "Associative operation without identity!");
Ops.push_back(std::make_pair(Identity, APInt(Bitwidth, 1)));
}
@@ -656,8 +595,8 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops) {
assert(Ops.size() > 1 && "Single values should be used directly!");
- // Since our optimizations never increase the number of operations, the new
- // expression can always be written by reusing the existing binary operators
+ // Since our optimizations should never increase the number of operations, the
+ // new expression can usually be written reusing the existing binary operators
// from the original expression tree, without creating any new instructions,
// though the rewritten expression may have a completely different topology.
// We take care to not change anything if the new expression will be the same
@@ -671,6 +610,20 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
unsigned Opcode = I->getOpcode();
BinaryOperator *Op = I;
+ /// NotRewritable - The operands being written will be the leaves of the new
+ /// expression and must not be used as inner nodes (via NodesToRewrite) by
+ /// mistake. Inner nodes are always reassociable, and usually leaves are not
+ /// (if they were they would have been incorporated into the expression and so
+ /// would not be leaves), so most of the time there is no danger of this. But
+ /// in rare cases a leaf may become reassociable if an optimization kills uses
+ /// of it, or it may momentarily become reassociable during rewriting (below)
+ /// due it being removed as an operand of one of its uses. Ensure that misuse
+ /// of leaf nodes as inner nodes cannot occur by remembering all of the future
+ /// leaves and refusing to reuse any of them as inner nodes.
+ SmallPtrSet<Value*, 8> NotRewritable;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ NotRewritable.insert(Ops[i].Op);
+
// ExpressionChanged - Non-null if the rewritten expression differs from the
// original in some non-trivial way, requiring the clearing of optional flags.
// Flags are cleared from the operator in ExpressionChanged up to I inclusive.
@@ -703,12 +656,14 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
// the old operands with the new ones.
DEBUG(dbgs() << "RA: " << *Op << '\n');
if (NewLHS != OldLHS) {
- if (BinaryOperator *BO = isReassociableOp(OldLHS, Opcode))
+ BinaryOperator *BO = isReassociableOp(OldLHS, Opcode);
+ if (BO && !NotRewritable.count(BO))
NodesToRewrite.push_back(BO);
Op->setOperand(0, NewLHS);
}
if (NewRHS != OldRHS) {
- if (BinaryOperator *BO = isReassociableOp(OldRHS, Opcode))
+ BinaryOperator *BO = isReassociableOp(OldRHS, Opcode);
+ if (BO && !NotRewritable.count(BO))
NodesToRewrite.push_back(BO);
Op->setOperand(1, NewRHS);
}
@@ -732,7 +687,8 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
Op->swapOperands();
} else {
// Overwrite with the new right-hand side.
- if (BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode))
+ BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode);
+ if (BO && !NotRewritable.count(BO))
NodesToRewrite.push_back(BO);
Op->setOperand(1, NewRHS);
ExpressionChanged = Op;
@@ -745,7 +701,8 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
// Now deal with the left-hand side. If this is already an operation node
// from the original expression then just rewrite the rest of the expression
// into it.
- if (BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode)) {
+ BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode);
+ if (BO && !NotRewritable.count(BO)) {
Op = BO;
continue;
}
@@ -1446,9 +1403,26 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops) {
// Now that we have the linearized expression tree, try to optimize it.
// Start by folding any constants that we found.
- if (Ops.size() == 1) return Ops[0].Op;
-
+ Constant *Cst = 0;
unsigned Opcode = I->getOpcode();
+ while (!Ops.empty() && isa<Constant>(Ops.back().Op)) {
+ Constant *C = cast<Constant>(Ops.pop_back_val().Op);
+ Cst = Cst ? ConstantExpr::get(Opcode, C, Cst) : C;
+ }
+ // If there was nothing but constants then we are done.
+ if (Ops.empty())
+ return Cst;
+
+ // Put the combined constant back at the end of the operand list, except if
+ // there is no point. For example, an add of 0 gets dropped here, while a
+ // multiplication by zero turns the whole expression into zero.
+ if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) {
+ if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType()))
+ return Cst;
+ Ops.push_back(ValueEntry(0, Cst));
+ }
+
+ if (Ops.size() == 1) return Ops[0].Op;
// Handle destructive annihilation due to identities between elements in the
// argument list here.
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index d95c855ce7..8284e144b2 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -334,7 +334,7 @@ private:
class UseBuilder;
friend class AllocaPartitioning::UseBuilder;
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// \brief Handle to alloca instruction to simplify method interfaces.
AllocaInst &AI;
#endif
@@ -568,6 +568,10 @@ private:
// Clamp the end offset to the end of the allocation. Note that this is
// formulated to handle even the case where "BeginOffset + Size" overflows.
+ // NOTE! This may appear superficially to be something we could ignore
+ // entirely, but that is not so! There may be PHI-node uses where some
+ // instructions are dead but not others. We can't completely ignore the
+ // PHI node, and so have to record at least the information here.
assert(AllocSize >= BeginOffset); // Established above.
if (Size > AllocSize - BeginOffset) {
DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
@@ -1122,7 +1126,7 @@ void AllocaPartitioning::splitAndMergePartitions() {
AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
:
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
AI(AI),
#endif
PointerEscapingInstr(0) {
@@ -1382,11 +1386,7 @@ class SROA : public FunctionPass {
/// \brief A collection of instructions to delete.
/// We try to batch deletions to simplify code and make things a bit more
/// efficient.
- SmallVector<Instruction *, 8> DeadInsts;
-
- /// \brief A set to prevent repeatedly marking an instruction split into many
- /// uses as dead. Only used to guard insertion into DeadInsts.
- SmallPtrSet<Instruction *, 4> DeadSplitInsts;
+ SetVector<Instruction *, SmallVector<Instruction *, 8> > DeadInsts;
/// \brief Post-promotion worklist.
///
@@ -1573,7 +1573,7 @@ private:
do {
LoadInst *LI = Loads.pop_back_val();
LI->replaceAllUsesWith(NewPN);
- Pass.DeadInsts.push_back(LI);
+ Pass.DeadInsts.insert(LI);
} while (!Loads.empty());
// Inject loads into all of the pred blocks.
@@ -1717,7 +1717,7 @@ private:
DEBUG(dbgs() << " speculated to: " << *V << "\n");
LI->replaceAllUsesWith(V);
- Pass.DeadInsts.push_back(LI);
+ Pass.DeadInsts.insert(LI);
}
}
};
@@ -2116,11 +2116,11 @@ static bool isVectorPromotionViable(const DataLayout &TD,
EndIndex > Ty->getNumElements())
return false;
- // FIXME: We should build shuffle vector instructions to handle
- // non-element-sized accesses.
- if ((EndOffset - BeginOffset) != ElementSize &&
- (EndOffset - BeginOffset) != VecSize)
- return false;
+ assert(EndIndex > BeginIndex && "Empty vector!");
+ uint64_t NumElements = EndIndex - BeginIndex;
+ Type *PartitionTy
+ = (NumElements == 1) ? Ty->getElementType()
+ : VectorType::get(Ty->getElementType(), NumElements);
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
if (MI->isVolatile())
@@ -2134,8 +2134,17 @@ static bool isVectorPromotionViable(const DataLayout &TD,
} else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
// Disable vector promotion when there are loads or stores of an FCA.
return false;
- } else if (!isa<LoadInst>(I->U->getUser()) &&
- !isa<StoreInst>(I->U->getUser())) {
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+ if (LI->isVolatile())
+ return false;
+ if (!canConvertValue(TD, PartitionTy, LI->getType()))
+ return false;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+ if (SI->isVolatile())
+ return false;
+ if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy))
+ return false;
+ } else {
return false;
}
}
@@ -2241,18 +2250,23 @@ static bool isIntegerWideningViable(const DataLayout &TD,
static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
IntegerType *Ty, uint64_t Offset,
const Twine &Name) {
+ DEBUG(dbgs() << " start: " << *V << "\n");
IntegerType *IntTy = cast<IntegerType>(V->getType());
assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
"Element extends past full value");
uint64_t ShAmt = 8*Offset;
if (DL.isBigEndian())
ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
- if (ShAmt)
+ if (ShAmt) {
V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
+ DEBUG(dbgs() << " shifted: " << *V << "\n");
+ }
assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
"Cannot extract to a larger integer!");
- if (Ty != IntTy)
+ if (Ty != IntTy) {
V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
+ DEBUG(dbgs() << " trunced: " << *V << "\n");
+ }
return V;
}
@@ -2262,20 +2276,27 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
IntegerType *Ty = cast<IntegerType>(V->getType());
assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
"Cannot insert a larger integer!");
- if (Ty != IntTy)
+ DEBUG(dbgs() << " start: " << *V << "\n");
+ if (Ty != IntTy) {
V = IRB.CreateZExt(V, IntTy, Name + ".ext");
+ DEBUG(dbgs() << " extended: " << *V << "\n");
+ }
assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
"Element store outside of alloca store");
uint64_t ShAmt = 8*Offset;
if (DL.isBigEndian())
ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
- if (ShAmt)
+ if (ShAmt) {
V = IRB.CreateShl(V, ShAmt, Name + ".shift");
+ DEBUG(dbgs() << " shifted: " << *V << "\n");
+ }
if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
+ DEBUG(dbgs() << " masked: " << *Old << "\n");
V = IRB.CreateOr(Old, V, Name + ".insert");
+ DEBUG(dbgs() << " inserted: " << *V << "\n");
}
return V;
}
@@ -2430,42 +2451,47 @@ private:
return getOffsetTypeAlign(Ty, BeginOffset - NewAllocaBeginOffset);
}
- ConstantInt *getIndex(IRBuilder<> &IRB, uint64_t Offset) {
+ unsigned getIndex(uint64_t Offset) {
assert(VecTy && "Can only call getIndex when rewriting a vector");
uint64_t RelOffset = Offset - NewAllocaBeginOffset;
assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
uint32_t Index = RelOffset / ElementSize;
assert(Index * ElementSize == RelOffset);
- return IRB.getInt32(Index);
+ return Index;
}
void deleteIfTriviallyDead(Value *V) {
Instruction *I = cast<Instruction>(V);
if (isInstructionTriviallyDead(I))
- Pass.DeadInsts.push_back(I);
+ Pass.DeadInsts.insert(I);
}
- bool rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) {
- Value *Result;
- if (LI.getType() == VecTy->getElementType() ||
- BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
- Result = IRB.CreateExtractElement(
- IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
- getIndex(IRB, BeginOffset), getName(".extract"));
- } else {
- Result = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) {
+ Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
getName(".load"));
+ unsigned BeginIndex = getIndex(BeginOffset);
+ unsigned EndIndex = getIndex(EndOffset);
+ assert(EndIndex > BeginIndex && "Empty vector!");
+ unsigned NumElements = EndIndex - BeginIndex;
+ assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+ if (NumElements == 1) {
+ V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
+ getName(".extract"));
+ DEBUG(dbgs() << " extract: " << *V << "\n");
+ } else if (NumElements < VecTy->getNumElements()) {
+ SmallVector<Constant*, 8> Mask;
+ Mask.reserve(NumElements);
+ for (unsigned i = BeginIndex; i != EndIndex; ++i)
+ Mask.push_back(IRB.getInt32(i));
+ V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
+ ConstantVector::get(Mask),
+ getName(".extract"));
+ DEBUG(dbgs() << " shuffle: " << *V << "\n");
}
- if (Result->getType() != LI.getType())
- Result = convertValue(TD, IRB, Result, LI.getType());
- LI.replaceAllUsesWith(Result);
- Pass.DeadInsts.push_back(&LI);
-
- DEBUG(dbgs() << " to: " << *Result << "\n");
- return true;
+ return V;
}
- bool rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
+ Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
assert(IntTy && "We cannot insert an integer to the alloca");
assert(!LI.isVolatile());
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
@@ -2473,12 +2499,10 @@ private:
V = convertValue(TD, IRB, V, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
- getName(".extract"));
- LI.replaceAllUsesWith(V);
- Pass.DeadInsts.push_back(&LI);
- DEBUG(dbgs() << " to: " << *V << "\n");
- return true;
+ if (Offset > 0 || EndOffset < NewAllocaEndOffset)
+ V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
+ getName(".extract"));
+ return V;
}
bool visitLoadInst(LoadInst &LI) {
@@ -2488,7 +2512,46 @@ private:
IRBuilder<> IRB(&LI);
uint64_t Size = EndOffset - BeginOffset;
- if (Size < TD.getTypeStoreSize(LI.getType())) {
+ bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType());
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of the original allocation it's behavior is undefined. Rather
+ // than trying to transform it, just replace it with undef.
+ // FIXME: We should do something more clever for functions being
+ // instrumented by asan.
+ // FIXME: Eventually, once ASan and friends can flush out bugs here, this
+ // should be transformed to a load of null making it unreachable.
+ uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType());
+ if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) {
+ LI.replaceAllUsesWith(UndefValue::get(LI.getType()));
+ Pass.DeadInsts.insert(&LI);
+ deleteIfTriviallyDead(OldOp);
+ DEBUG(dbgs() << " to: undef!!\n");
+ return true;
+ }
+
+ Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8)
+ : LI.getType();
+ bool IsPtrAdjusted = false;
+ Value *V;
+ if (VecTy) {
+ V = rewriteVectorizedLoadInst(IRB, LI, OldOp);
+ } else if (IntTy && LI.getType()->isIntegerTy()) {
+ V = rewriteIntegerLoad(IRB, LI);
+ } else if (BeginOffset == NewAllocaBeginOffset &&
+ canConvertValue(TD, NewAllocaTy, LI.getType())) {
+ V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ LI.isVolatile(), getName(".load"));
+ } else {
+ Type *LTy = TargetTy->getPointerTo();
+ V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy),
+ getPartitionTypeAlign(TargetTy),
+ LI.isVolatile(), getName(".load"));
+ IsPtrAdjusted = true;
+ }
+ V = convertValue(TD, IRB, V, TargetTy);
+
+ if (IsSplitIntLoad) {
assert(!LI.isVolatile());
assert(LI.getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
@@ -2498,21 +2561,8 @@ private:
assert(LI.getType()->getIntegerBitWidth() ==
TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
"Only alloca-wide loads can be split and recomposed");
- IntegerType *NarrowTy = Type::getIntNTy(LI.getContext(), Size * 8);
- bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
- canConvertValue(TD, NewAllocaTy, NarrowTy);
- Value *V;
// Move the insertion point just past the load so that we can refer to it.
IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
- if (IsConvertable)
- V = convertValue(TD, IRB,
- IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load")),
- NarrowTy);
- else
- V = IRB.CreateAlignedLoad(
- getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
- getPartitionTypeAlign(NarrowTy), getName(".load"));
// Create a placeholder value with the same type as LI to use as the
// basis for the new value. This allows us to replace the uses of LI with
// the computed value, and then replace the placeholder with LI, leaving
@@ -2524,67 +2574,77 @@ private:
LI.replaceAllUsesWith(V);
Placeholder->replaceAllUsesWith(&LI);
delete Placeholder;
- if (Pass.DeadSplitInsts.insert(&LI))
- Pass.DeadInsts.push_back(&LI);
- DEBUG(dbgs() << " to: " << *V << "\n");
- return IsConvertable;
- }
-
- if (VecTy)
- return rewriteVectorizedLoadInst(IRB, LI, OldOp);
- if (IntTy && LI.getType()->isIntegerTy())
- return rewriteIntegerLoad(IRB, LI);
-
- if (BeginOffset == NewAllocaBeginOffset &&
- canConvertValue(TD, NewAllocaTy, LI.getType())) {
- Value *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- LI.isVolatile(), getName(".load"));
- Value *NewV = convertValue(TD, IRB, NewLI, LI.getType());
- LI.replaceAllUsesWith(NewV);
- Pass.DeadInsts.push_back(&LI);
-
- DEBUG(dbgs() << " to: " << *NewLI << "\n");
- return !LI.isVolatile();
+ } else {
+ LI.replaceAllUsesWith(V);
}
- assert(!IntTy && "Invalid load found with int-op widening enabled");
-
- Value *NewPtr = getAdjustedAllocaPtr(IRB,
- LI.getPointerOperand()->getType());
- LI.setOperand(0, NewPtr);
- LI.setAlignment(getPartitionTypeAlign(LI.getType()));
- DEBUG(dbgs() << " to: " << LI << "\n");
-
+ Pass.DeadInsts.insert(&LI);
deleteIfTriviallyDead(OldOp);
- return NewPtr == &NewAI && !LI.isVolatile();
- }
-
- bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, StoreInst &SI,
- Value *OldOp) {
- Value *V = SI.getValueOperand();
- if (V->getType() == ElementTy ||
- BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
- if (V->getType() != ElementTy)
- V = convertValue(TD, IRB, V, ElementTy);
+ DEBUG(dbgs() << " to: " << *V << "\n");
+ return !LI.isVolatile() && !IsPtrAdjusted;
+ }
+
+ bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V,
+ StoreInst &SI, Value *OldOp) {
+ unsigned BeginIndex = getIndex(BeginOffset);
+ unsigned EndIndex = getIndex(EndOffset);
+ assert(EndIndex > BeginIndex && "Empty vector!");
+ unsigned NumElements = EndIndex - BeginIndex;
+ assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+ Type *PartitionTy
+ = (NumElements == 1) ? ElementTy
+ : VectorType::get(ElementTy, NumElements);
+ if (V->getType() != PartitionTy)
+ V = convertValue(TD, IRB, V, PartitionTy);
+ if (NumElements < VecTy->getNumElements()) {
+ // We need to mix in the existing elements.
LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
getName(".load"));
- V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset),
- getName(".insert"));
- } else if (V->getType() != VecTy) {
+ if (NumElements == 1) {
+ V = IRB.CreateInsertElement(LI, V, IRB.getInt32(BeginIndex),
+ getName(".insert"));
+ DEBUG(dbgs() << " insert: " << *V << "\n");
+ } else {
+ // When inserting a smaller vector into the larger to store, we first
+ // use a shuffle vector to widen it with undef elements, and then
+ // a second shuffle vector to select between the loaded vector and the
+ // incoming vector.
+ SmallVector<Constant*, 8> Mask;
+ Mask.reserve(VecTy->getNumElements());
+ for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
+ if (i >= BeginIndex && i < EndIndex)
+ Mask.push_back(IRB.getInt32(i - BeginIndex));
+ else
+ Mask.push_back(UndefValue::get(IRB.getInt32Ty()));
+ V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
+ ConstantVector::get(Mask),
+ getName(".expand"));
+ DEBUG(dbgs() << " shuffle1: " << *V << "\n");
+
+ Mask.clear();
+ for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
+ if (i >= BeginIndex && i < EndIndex)
+ Mask.push_back(IRB.getInt32(i));
+ else
+ Mask.push_back(IRB.getInt32(i + VecTy->getNumElements()));
+ V = IRB.CreateShuffleVector(V, LI, ConstantVector::get(Mask),
+ getName("insert"));
+ DEBUG(dbgs() << " shuffle2: " << *V << "\n");
+ }
+ } else {
V = convertValue(TD, IRB, V, VecTy);
}
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
- Pass.DeadInsts.push_back(&SI);
+ Pass.DeadInsts.insert(&SI);
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
}
- bool rewriteIntegerStore(IRBuilder<> &IRB, StoreInst &SI) {
+ bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) {
assert(IntTy && "We cannot extract an integer from the alloca");
assert(!SI.isVolatile());
- Value *V = SI.getValueOperand();
if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
getName(".oldload"));
@@ -2596,7 +2656,7 @@ private:
}
V = convertValue(TD, IRB, V, NewAllocaTy);
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
- Pass.DeadInsts.push_back(&SI);
+ Pass.DeadInsts.insert(&SI);
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
@@ -2608,74 +2668,53 @@ private:
assert(OldOp == OldPtr);
IRBuilder<> IRB(&SI);
- if (VecTy)
- return rewriteVectorizedStoreInst(IRB, SI, OldOp);
- Type *ValueTy = SI.getValueOperand()->getType();
+ Value *V = SI.getValueOperand();
+
+ // Strip all inbounds GEPs and pointer casts to try to dig out any root
+ // alloca that should be re-examined after promoting this alloca.
+ if (V->getType()->isPointerTy())
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
+ Pass.PostPromotionWorklist.insert(AI);
uint64_t Size = EndOffset - BeginOffset;
- if (Size < TD.getTypeStoreSize(ValueTy)) {
+ if (Size < TD.getTypeStoreSize(V->getType())) {
assert(!SI.isVolatile());
- assert(ValueTy->isIntegerTy() &&
+ assert(V->getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
- assert(ValueTy->getIntegerBitWidth() ==
- TD.getTypeStoreSizeInBits(ValueTy) &&
+ assert(V->getType()->getIntegerBitWidth() ==
+ TD.getTypeStoreSizeInBits(V->getType()) &&
"Non-byte-multiple bit width");
- assert(ValueTy->getIntegerBitWidth() ==
+ assert(V->getType()->getIntegerBitWidth() ==
TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
"Only alloca-wide stores can be split and recomposed");
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
- Value *V = extractInteger(TD, IRB, SI.getValueOperand(), NarrowTy,
- BeginOffset, getName(".extract"));
- StoreInst *NewSI;
- bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
- canConvertValue(TD, NarrowTy, NewAllocaTy);
- if (IsConvertable)
- NewSI = IRB.CreateAlignedStore(convertValue(TD, IRB, V, NewAllocaTy),
- &NewAI, NewAI.getAlignment());
- else
- NewSI = IRB.CreateAlignedStore(
- V, getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
- getPartitionTypeAlign(NarrowTy));
- (void)NewSI;
- if (Pass.DeadSplitInsts.insert(&SI))
- Pass.DeadInsts.push_back(&SI);
-
- DEBUG(dbgs() << " to: " << *NewSI << "\n");
- return IsConvertable;
+ V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
+ getName(".extract"));
}
- if (IntTy && ValueTy->isIntegerTy())
- return rewriteIntegerStore(IRB, SI);
-
- // Strip all inbounds GEPs and pointer casts to try to dig out any root
- // alloca that should be re-examined after promoting this alloca.
- if (ValueTy->isPointerTy())
- if (AllocaInst *AI = dyn_cast<AllocaInst>(SI.getValueOperand()
- ->stripInBoundsOffsets()))
- Pass.PostPromotionWorklist.insert(AI);
+ if (VecTy)
+ return rewriteVectorizedStoreInst(IRB, V, SI, OldOp);
+ if (IntTy && V->getType()->isIntegerTy())
+ return rewriteIntegerStore(IRB, V, SI);
+ StoreInst *NewSI;
if (BeginOffset == NewAllocaBeginOffset &&
- canConvertValue(TD, ValueTy, NewAllocaTy)) {
- Value *NewV = convertValue(TD, IRB, SI.getValueOperand(), NewAllocaTy);
- StoreInst *NewSI = IRB.CreateAlignedStore(NewV, &NewAI, NewAI.getAlignment(),
- SI.isVolatile());
- (void)NewSI;
- Pass.DeadInsts.push_back(&SI);
-
- DEBUG(dbgs() << " to: " << *NewSI << "\n");
- return !SI.isVolatile();
+ canConvertValue(TD, V->getType(), NewAllocaTy)) {
+ V = convertValue(TD, IRB, V, NewAllocaTy);
+ NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
+ SI.isVolatile());
+ } else {
+ Value *NewPtr = getAdjustedAllocaPtr(IRB, V->getType()->getPointerTo());
+ NewSI = IRB.CreateAlignedStore(V, NewPtr,
+ getPartitionTypeAlign(V->getType()),
+ SI.isVolatile());
}
-
- assert(!IntTy && "Invalid store found with int-op widening enabled");
-
- Value *NewPtr = getAdjustedAllocaPtr(IRB,
- SI.getPointerOperand()->getType());
- SI.setOperand(1, NewPtr);
- SI.setAlignment(getPartitionTypeAlign(SI.getValueOperand()->getType()));
- DEBUG(dbgs() << " to: " << SI << "\n");
-
+ (void)NewSI;
+ Pass.DeadInsts.insert(&SI);
deleteIfTriviallyDead(OldOp);
- return NewPtr == &NewAI && !SI.isVolatile();
+
+ DEBUG(dbgs() << " to: " << *NewSI << "\n");
+ return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
}
bool visitMemSetInst(MemSetInst &II) {
@@ -2695,8 +2734,7 @@ private:
}
// Record this instruction for deletion.
- if (Pass.DeadSplitInsts.insert(&II))
- Pass.DeadInsts.push_back(&II);
+ Pass.DeadInsts.insert(&II);
Type *AllocaTy = NewAI.getAllocatedType();
Type *ScalarTy = AllocaTy->getScalarType();
@@ -2747,7 +2785,7 @@ private:
IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI,
NewAI.getAlignment(),
getName(".load")),
- V, getIndex(IRB, BeginOffset),
+ V, IRB.getInt32(getIndex(BeginOffset)),
getName(".insert")),
&NewAI, NewAI.getAlignment());
(void)Store;
@@ -2852,8 +2890,7 @@ private:
return false;
}
// Record this instruction for deletion.
- if (Pass.DeadSplitInsts.insert(&II))
- Pass.DeadInsts.push_back(&II);
+ Pass.DeadInsts.insert(&II);
bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
EndOffset == NewAllocaEndOffset;
@@ -2916,7 +2953,7 @@ private:
// We have to extract rather than load.
Src = IRB.CreateExtractElement(
IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")),
- getIndex(IRB, BeginOffset),
+ IRB.getInt32(getIndex(BeginOffset)),
getName(".copyextract"));
} else if (IntTy && !IsWholeAlloca && !IsDest) {
Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
@@ -2944,7 +2981,7 @@ private:
// We have to insert into a loaded copy before storing.
Src = IRB.CreateInsertElement(
IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
- Src, getIndex(IRB, BeginOffset),
+ Src, IRB.getInt32(getIndex(BeginOffset)),
getName(".insert"));
}
@@ -2963,8 +3000,7 @@ private:
assert(II.getArgOperand(1) == OldPtr);
// Record this instruction for deletion.
- if (Pass.DeadSplitInsts.insert(&II))
- Pass.DeadInsts.push_back(&II);
+ Pass.DeadInsts.insert(&II);
ConstantInt *Size
= ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
@@ -3533,7 +3569,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
DI != DE; ++DI) {
Changed = true;
(*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType()));
- DeadInsts.push_back(*DI);
+ DeadInsts.insert(*DI);
}
for (AllocaPartitioning::dead_op_iterator DO = P.dead_op_begin(),
DE = P.dead_op_end();
@@ -3544,7 +3580,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
if (Instruction *OldI = dyn_cast<Instruction>(OldV))
if (isInstructionTriviallyDead(OldI)) {
Changed = true;
- DeadInsts.push_back(OldI);
+ DeadInsts.insert(OldI);
}
}
@@ -3565,7 +3601,6 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
/// We also record the alloca instructions deleted here so that they aren't
/// subsequently handed to mem2reg to promote.
void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
- DeadSplitInsts.clear();
while (!DeadInsts.empty()) {
Instruction *I = DeadInsts.pop_back_val();
DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
@@ -3577,7 +3612,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
// Zero out the operand and see if it becomes trivially dead.
*OI = 0;
if (isInstructionTriviallyDead(U))
- DeadInsts.push_back(U);
+ DeadInsts.insert(U);
}
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 17d07cdb2d..0788f19014 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -19,7 +19,6 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/IRBuilder.h"
-#include "llvm/Intrinsics.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
@@ -39,10 +38,6 @@ using namespace llvm;
STATISTIC(NumSimplified, "Number of library calls simplified");
STATISTIC(NumAnnotated, "Number of attributes added to library functions");
-static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
- cl::init(false),
- cl::desc("Enable unsafe double to float "
- "shrinking for math lib calls"));
//===----------------------------------------------------------------------===//
// Optimizer Base Class
//===----------------------------------------------------------------------===//
@@ -101,398 +96,10 @@ static bool CallHasFloatingPointArgument(const CallInst *CI) {
namespace {
//===----------------------------------------------------------------------===//
-// Math Library Optimizations
-//===----------------------------------------------------------------------===//
-
-//===---------------------------------------===//
-// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
-
-struct UnaryDoubleFPOpt : public LibCallOptimization {
- bool CheckRetType;
- UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {}
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
- !FT->getParamType(0)->isDoubleTy())
- return 0;
-
- if (CheckRetType) {
- // Check if all the uses for function like 'sin' are converted to float.
- for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end();
- ++UseI) {
- FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI);
- if (Cast == 0 || !Cast->getType()->isFloatTy())
- return 0;
- }
- }
-
- // If this is something like 'floor((double)floatval)', convert to floorf.
- FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
- if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
- return 0;
-
- // floor((double)floatval) -> (double)floorf(floatval)
- Value *V = Cast->getOperand(0);
- V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
- return B.CreateFPExt(V, B.getDoubleTy());
- }
-};
-
-//===---------------------------------------===//
-// 'cos*' Optimizations
-struct CosOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- Value *Ret = NULL;
- if (UnsafeFPShrink && Callee->getName() == "cos" &&
- TLI->has(LibFunc::cosf)) {
- UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
- Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B);
- }
-
- FunctionType *FT = Callee->getFunctionType();
- // Just make sure this has 1 argument of FP type, which matches the
- // result type.
- if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isFloatingPointTy())
- return Ret;
-
- // cos(-x) -> cos(x)
- Value *Op1 = CI->getArgOperand(0);
- if (BinaryOperator::isFNeg(Op1)) {
- BinaryOperator *BinExpr = cast<BinaryOperator>(Op1);
- return B.CreateCall(Callee, BinExpr->getOperand(1), "cos");
- }
- return Ret;
- }
-};
-
-//===---------------------------------------===//
-// 'pow*' Optimizations
-
-struct PowOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- Value *Ret = NULL;
- if (UnsafeFPShrink && Callee->getName() == "pow" &&
- TLI->has(LibFunc::powf)) {
- UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
- Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B);
- }
-
- FunctionType *FT = Callee->getFunctionType();
- // Just make sure this has 2 arguments of the same FP type, which match the
- // result type.
- if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- !FT->getParamType(0)->isFloatingPointTy())
- return Ret;
-
- Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
- if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
- if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0
- return Op1C;
- if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x)
- return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
- }
-
- ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
- if (Op2C == 0) return Ret;
-
- if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
- return ConstantFP::get(CI->getType(), 1.0);
-
- if (Op2C->isExactlyValue(0.5)) {
- // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
- // This is faster than calling pow, and still handles negative zero
- // and negative infinity correctly.
- // TODO: In fast-math mode, this could be just sqrt(x).
- // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
- Value *Inf = ConstantFP::getInfinity(CI->getType());
- Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
- Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B,
- Callee->getAttributes());
- Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B,
- Callee->getAttributes());
- Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
- Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
- return Sel;
- }
-
- if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x
- return Op1;
- if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x
- return B.CreateFMul(Op1, Op1, "pow2");
- if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
- return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0),
- Op1, "powrecip");
- return 0;
- }
-};
-
-//===---------------------------------------===//
-// 'exp2' Optimizations
-
-struct Exp2Opt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- Value *Ret = NULL;
- if (UnsafeFPShrink && Callee->getName() == "exp2" &&
- TLI->has(LibFunc::exp2)) {
- UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
- Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B);
- }
-
- FunctionType *FT = Callee->getFunctionType();
- // Just make sure this has 1 argument of FP type, which matches the
- // result type.
- if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isFloatingPointTy())
- return Ret;
-
- Value *Op = CI->getArgOperand(0);
- // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
- // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
- Value *LdExpArg = 0;
- if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
- if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
- LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
- } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
- if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
- LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
- }
-
- if (LdExpArg) {
- const char *Name;
- if (Op->getType()->isFloatTy())
- Name = "ldexpf";
- else if (Op->getType()->isDoubleTy())
- Name = "ldexp";
- else
- Name = "ldexpl";
-
- Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
- if (!Op->getType()->isFloatTy())
- One = ConstantExpr::getFPExtend(One, Op->getType());
-
- Module *M = Caller->getParent();
- Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
- Op->getType(),
- B.getInt32Ty(), NULL);
- CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
- if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
- return CI;
- }
- return Ret;
- }
-};
-
-//===----------------------------------------------------------------------===//
-// Integer Optimizations
-//===----------------------------------------------------------------------===//
-
-//===---------------------------------------===//
-// 'ffs*' Optimizations
-
-struct FFSOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- FunctionType *FT = Callee->getFunctionType();
- // Just make sure this has 2 arguments of the same FP type, which match the
- // result type.
- if (FT->getNumParams() != 1 ||
- !FT->getReturnType()->isIntegerTy(32) ||
- !FT->getParamType(0)->isIntegerTy())
- return 0;
-
- Value *Op = CI->getArgOperand(0);
-
- // Constant fold.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
- if (CI->isZero()) // ffs(0) -> 0.
- return B.getInt32(0);
- // ffs(c) -> cttz(c)+1
- return B.getInt32(CI->getValue().countTrailingZeros() + 1);
- }
-
- // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
- Type *ArgType = Op->getType();
- Value *F = Intrinsic::getDeclaration(Callee->getParent(),
- Intrinsic::cttz, ArgType);
- Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
- V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
- V = B.CreateIntCast(V, B.getInt32Ty(), false);
-
- Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
- return B.CreateSelect(Cond, V, B.getInt32(0));
- }
-};
-
-//===---------------------------------------===//
-// 'isdigit' Optimizations
-
-struct IsDigitOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- FunctionType *FT = Callee->getFunctionType();
- // We require integer(i32)
- if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
- !FT->getParamType(0)->isIntegerTy(32))
- return 0;
-
- // isdigit(c) -> (c-'0') <u 10
- Value *Op = CI->getArgOperand(0);
- Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
- Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit");
- return B.CreateZExt(Op, CI->getType());
- }
-};
-
-//===---------------------------------------===//
-// 'isascii' Optimizations
-
-struct IsAsciiOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- FunctionType *FT = Callee->getFunctionType();
- // We require integer(i32)
- if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
- !FT->getParamType(0)->isIntegerTy(32))
- return 0;
-
- // isascii(c) -> c <u 128
- Value *Op = CI->getArgOperand(0);
- Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
- return B.CreateZExt(Op, CI->getType());
- }
-};
-
-//===---------------------------------------===//
-// 'abs', 'labs', 'llabs' Optimizations
-
-struct AbsOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- FunctionType *FT = Callee->getFunctionType();
- // We require integer(integer) where the types agree.
- if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
- FT->getParamType(0) != FT->getReturnType())
- return 0;
-
- // abs(x) -> x >s -1 ? x : -x
- Value *Op = CI->getArgOperand(0);
- Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()),
- "ispos");
- Value *Neg = B.CreateNeg(Op, "neg");
- return B.CreateSelect(Pos, Op, Neg);
- }
-};
-
-
-//===---------------------------------------===//
-// 'toascii' Optimizations
-
-struct ToAsciiOpt : public LibCallOptimization {
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- FunctionType *FT = Callee->getFunctionType();
- // We require i32(i32)
- if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
- !FT->getParamType(0)->isIntegerTy(32))
- return 0;
-
- // isascii(c) -> c & 0x7f
- return B.CreateAnd(CI->getArgOperand(0),
- ConstantInt::get(CI->getType(),0x7F));
- }
-};
-
-//===----------------------------------------------------------------------===//
// Formatting and IO Optimizations
//===----------------------------------------------------------------------===//
//===---------------------------------------===//
-// 'printf' Optimizations
-
-struct PrintFOpt : public LibCallOptimization {
- Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
- IRBuilder<> &B) {
- // Check for a fixed format string.
- StringRef FormatStr;
- if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
- return 0;
-
- // Empty format string -> noop.
- if (FormatStr.empty()) // Tolerate printf's declared void.
- return CI->use_empty() ? (Value*)CI :
- ConstantInt::get(CI->getType(), 0);
-
- // Do not do any of the following transformations if the printf return value
- // is used, in general the printf return value is not compatible with either
- // putchar() or puts().
- if (!CI->use_empty())
- return 0;
-
- // printf("x") -> putchar('x'), even for '%'.
- if (FormatStr.size() == 1) {
- Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI);
- if (CI->use_empty() || !Res) return Res;
- return B.CreateIntCast(Res, CI->getType(), true);
- }
-
- // printf("foo\n") --> puts("foo")
- if (FormatStr[FormatStr.size()-1] == '\n' &&
- FormatStr.find('%') == std::string::npos) { // no format characters.
- // Create a string literal with no \n on it. We expect the constant merge
- // pass to be run after this pass, to merge duplicate strings.
- FormatStr = FormatStr.drop_back();
- Value *GV = B.CreateGlobalString(FormatStr, "str");
- Value *NewCI = EmitPutS(GV, B, TD, TLI);
- return (CI->use_empty() || !NewCI) ?
- NewCI :
- ConstantInt::get(CI->getType(), FormatStr.size()+1);
- }
-
- // Optimize specific format strings.
- // printf("%c", chr) --> putchar(chr)
- if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
- CI->getArgOperand(1)->getType()->isIntegerTy()) {
- Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI);
-
- if (CI->use_empty() || !Res) return Res;
- return B.CreateIntCast(Res, CI->getType(), true);
- }
-
- // printf("%s\n", str) --> puts(str)
- if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
- CI->getArgOperand(1)->getType()->isPointerTy()) {
- return EmitPutS(CI->getArgOperand(1), B, TD, TLI);
- }
- return 0;
- }
-
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Require one fixed pointer argument and an integer/void result.
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
- !(FT->getReturnType()->isIntegerTy() ||
- FT->getReturnType()->isVoidTy()))
- return 0;
-
- if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
- return V;
- }
-
- // printf(format, ...) -> iprintf(format, ...) if no floating point
- // arguments.
- if (TLI->has(LibFunc::iprintf) && !CallHasFloatingPointArgument(CI)) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- Constant *IPrintFFn =
- M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
- CallInst *New = cast<CallInst>(CI->clone());
- New->setCalledFunction(IPrintFFn);
- B.Insert(New);
- return New;
- }
- return 0;
- }
-};
-
-//===---------------------------------------===//
// 'sprintf' Optimizations
struct SPrintFOpt : public LibCallOptimization {
@@ -768,22 +375,15 @@ namespace {
TargetLibraryInfo *TLI;
StringMap<LibCallOptimization*> Optimizations;
- // Math Library Optimizations
- CosOpt Cos; PowOpt Pow; Exp2Opt Exp2;
- UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP;
- // Integer Optimizations
- FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii;
- ToAsciiOpt ToAscii;
// Formatting and IO Optimizations
- SPrintFOpt SPrintF; PrintFOpt PrintF;
+ SPrintFOpt SPrintF;
FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
PutsOpt Puts;
bool Modified; // This is only used by doInitialization.
public:
static char ID; // Pass identification
- SimplifyLibCalls() : FunctionPass(ID), UnaryDoubleFP(false),
- UnsafeUnaryDoubleFP(true) {
+ SimplifyLibCalls() : FunctionPass(ID) {
initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
}
void AddOpt(LibFunc::Func F, LibCallOptimization* Opt);
@@ -833,73 +433,8 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2,
/// Optimizations - Populate the Optimizations map with all the optimizations
/// we know.
void SimplifyLibCalls::InitOptimizations() {
- // Math Library Optimizations
- Optimizations["cosf"] = &Cos;
- Optimizations["cos"] = &Cos;
- Optimizations["cosl"] = &Cos;
- Optimizations["powf"] = &Pow;
- Optimizations["pow"] = &Pow;
- Optimizations["powl"] = &Pow;
- Optimizations["llvm.pow.f32"] = &Pow;
- Optimizations["llvm.pow.f64"] = &Pow;
- Optimizations["llvm.pow.f80"] = &Pow;
- Optimizations["llvm.pow.f128"] = &Pow;
- Optimizations["llvm.pow.ppcf128"] = &Pow;
- Optimizations["exp2l"] = &Exp2;
- Optimizations["exp2"] = &Exp2;
- Optimizations["exp2f"] = &Exp2;
- Optimizations["llvm.exp2.ppcf128"] = &Exp2;
- Optimizations["llvm.exp2.f128"] = &Exp2;
- Optimizations["llvm.exp2.f80"] = &Exp2;
- Optimizations["llvm.exp2.f64"] = &Exp2;
- Optimizations["llvm.exp2.f32"] = &Exp2;
-
- AddOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP);
- AddOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP);
- AddOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP);
- AddOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP);
- AddOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP);
- AddOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP);
- AddOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP);
-
- if(UnsafeFPShrink) {
- AddOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP);
- AddOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP);
- }
-
- // Integer Optimizations
- Optimizations["ffs"] = &FFS;
- Optimizations["ffsl"] = &FFS;
- Optimizations["ffsll"] = &FFS;
- Optimizations["abs"] = &Abs;
- Optimizations["labs"] = &Abs;
- Optimizations["llabs"] = &Abs;
- Optimizations["isdigit"] = &IsDigit;
- Optimizations["isascii"] = &IsAscii;
- Optimizations["toascii"] = &ToAscii;
-
// Formatting and IO Optimizations
Optimizations["sprintf"] = &SPrintF;
- Optimizations["printf"] = &PrintF;
AddOpt(LibFunc::fwrite, &FWrite);
AddOpt(LibFunc::fputs, &FPuts);
Optimizations["fprintf"] = &FPrintF;
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index fa2faa2dad..74b2ee10e0 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -47,7 +47,9 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
ArrayRef<Attributes::AttrVal>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI),
+ Constant *StrLen = M->getOrInsertFunction("strlen",
+ AttrListPtr::get(M->getContext(),
+ AWI),
TD->getIntPtrType(Context),
B.getInt8PtrTy(),
NULL);
@@ -74,7 +76,9 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
ArrayRef<Attributes::AttrVal>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI),
+ Constant *StrNLen = M->getOrInsertFunction("strnlen",
+ AttrListPtr::get(M->getContext(),
+ AWI),
TD->getIntPtrType(Context),
B.getInt8PtrTy(),
TD->getIntPtrType(Context),
@@ -102,7 +106,9 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
Type *I8Ptr = B.getInt8PtrTy();
Type *I32Ty = B.getInt32Ty();
- Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(AWI),
+ Constant *StrChr = M->getOrInsertFunction("strchr",
+ AttrListPtr::get(M->getContext(),
+ AWI),
I8Ptr, I8Ptr, I32Ty, NULL);
CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
ConstantInt::get(I32Ty, C), "strchr");
@@ -127,7 +133,9 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
ArrayRef<Attributes::AttrVal>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI),
+ Value *StrNCmp = M->getOrInsertFunction("strncmp",
+ AttrListPtr::get(M->getContext(),
+ AWI),
B.getInt32Ty(),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
@@ -155,7 +163,8 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
Attributes::NoUnwind);
Type *I8Ptr = B.getInt8PtrTy();
- Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI),
+ Value *StrCpy = M->getOrInsertFunction(Name,
+ AttrListPtr::get(M->getContext(), AWI),
I8Ptr, I8Ptr, I8Ptr, NULL);
CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
Name);
@@ -178,7 +187,9 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
Attributes::NoUnwind);
Type *I8Ptr = B.getInt8PtrTy();
- Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI),
+ Value *StrNCpy = M->getOrInsertFunction(Name,
+ AttrListPtr::get(M->getContext(),
+ AWI),
I8Ptr, I8Ptr, I8Ptr,
Len->getType(), NULL);
CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
@@ -203,7 +214,7 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
Attributes::NoUnwind);
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
- AttrListPtr::get(AWI),
+ AttrListPtr::get(M->getContext(), AWI),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
@@ -231,7 +242,8 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
ArrayRef<Attributes::AttrVal>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(AWI),
+ Value *MemChr = M->getOrInsertFunction("memchr",
+ AttrListPtr::get(M->getContext(), AWI),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
B.getInt32Ty(),
@@ -261,7 +273,8 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
ArrayRef<Attributes::AttrVal>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI),
+ Value *MemCmp = M->getOrInsertFunction("memcmp",
+ AttrListPtr::get(M->getContext(), AWI),
B.getInt32Ty(),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
@@ -338,7 +351,8 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
Attributes::NoUnwind);
- Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI),
+ Value *PutS = M->getOrInsertFunction("puts",
+ AttrListPtr::get(M->getContext(), AWI),
B.getInt32Ty(),
B.getInt8PtrTy(),
NULL);
@@ -362,7 +376,8 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
Attributes::NoUnwind);
Constant *F;
if (File->getType()->isPointerTy())
- F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI),
+ F = M->getOrInsertFunction("fputc",
+ AttrListPtr::get(M->getContext(), AWI),
B.getInt32Ty(),
B.getInt32Ty(), File->getType(),
NULL);
@@ -396,7 +411,8 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
StringRef FPutsName = TLI->getName(LibFunc::fputs);
Constant *F;
if (File->getType()->isPointerTy())
- F = M->getOrInsertFunction(FPutsName, AttrListPtr::get(AWI),
+ F = M->getOrInsertFunction(FPutsName,
+ AttrListPtr::get(M->getContext(), AWI),
B.getInt32Ty(),
B.getInt8PtrTy(),
File->getType(), NULL);
@@ -429,7 +445,8 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
StringRef FWriteName = TLI->getName(LibFunc::fwrite);
Constant *F;
if (File->getType()->isPointerTy())
- F = M->getOrInsertFunction(FWriteName, AttrListPtr::get(AWI),
+ F = M->getOrInsertFunction(FWriteName,
+ AttrListPtr::get(M->getContext(), AWI),
TD->getIntPtrType(Context),
B.getInt8PtrTy(),
TD->getIntPtrType(Context),
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 009847f87b..303de56d95 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -668,10 +668,29 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
if (hasLifetimeMarkers(AI))
continue;
- builder.CreateLifetimeStart(AI);
+ // Try to determine the size of the allocation.
+ ConstantInt *AllocaSize = 0;
+ if (ConstantInt *AIArraySize =
+ dyn_cast<ConstantInt>(AI->getArraySize())) {
+ if (IFI.TD) {
+ Type *AllocaType = AI->getAllocatedType();
+ uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType);
+ uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
+ assert(AllocaArraySize > 0 && "array size of AllocaInst is zero");
+ // Check that array size doesn't saturate uint64_t and doesn't
+ // overflow when it's multiplied by type size.
+ if (AllocaArraySize != ~0ULL &&
+ UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
+ AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
+ AllocaArraySize * AllocaTypeSize);
+ }
+ }
+ }
+
+ builder.CreateLifetimeStart(AI, AllocaSize);
for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
IRBuilder<> builder(Returns[ri]);
- builder.CreateLifetimeEnd(AI);
+ builder.CreateLifetimeEnd(AI, AllocaSize);
}
}
}
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index 233bc12d3c..ada8e3b11e 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -37,7 +37,7 @@ namespace {
next = seed;
}
- int rand(void) {
+ int rand() {
next = next * 1103515245 + 12345;
return (unsigned int)(next / 65536) % 32768;
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index b33d0d473b..6c34eed13d 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -858,7 +858,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
if (PredHasWeights) {
GetBranchWeights(PTI, Weights);
- // branch-weight metadata is inconsistant here.
+ // branch-weight metadata is inconsistent here.
if (Weights.size() != 1 + PredCases.size())
PredHasWeights = SuccHasWeights = false;
} else if (SuccHasWeights)
@@ -870,7 +870,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
SmallVector<uint64_t, 8> SuccWeights;
if (SuccHasWeights) {
GetBranchWeights(TI, SuccWeights);
- // branch-weight metadata is inconsistant here.
+ // branch-weight metadata is inconsistent here.
if (SuccWeights.size() != 1 + BBCases.size())
PredHasWeights = SuccHasWeights = false;
} else if (PredHasWeights)
@@ -967,8 +967,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I =
PTIHandled.begin(),
E = PTIHandled.end(); I != E; ++I) {
- if (PredHasWeights || SuccHasWeights)
- Weights.push_back(WeightsForHandled[*I]);
+ if (PredHasWeights || SuccHasWeights)
+ Weights.push_back(WeightsForHandled[*I]);
PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault));
NewSuccessors.push_back(BBDefault);
}
@@ -1193,7 +1193,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
I != E; ++I) {
if (PHINode *PN = dyn_cast<PHINode>(I)) {
Value *BB1V = PN->getIncomingValueForBlock(BB1);
- Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
MapValueFromBB1ToBB2[BB1V] = std::make_pair(BB2V, PN);
} else {
FirstNonPhiInBBEnd = &*I;
@@ -1202,7 +1202,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
}
if (!FirstNonPhiInBBEnd)
return false;
-
+
// This does very trivial matching, with limited scanning, to find identical
// instructions in the two blocks. We scan backward for obviously identical
@@ -1415,7 +1415,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
if (BB1V == BIParentV)
continue;
- // Check for saftey.
+ // Check for safety.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BB1V)) {
// An unfolded ConstantExpr could end up getting expanded into
// Instructions. Don't speculate this and another instruction at
@@ -3539,7 +3539,8 @@ static bool SwitchToLookupTable(SwitchInst *SI,
assert(SI->getNumCases() > 1 && "Degenerate switch?");
// Only build lookup table when we have a target that supports it.
- if (!TTI || !TTI->getScalarTargetTransformInfo()->shouldBuildLookupTables())
+ if (!TTI || !TTI->getScalarTargetTransformInfo() ||
+ !TTI->getScalarTargetTransformInfo()->shouldBuildLookupTables())
return false;
// FIXME: If the switch is too sparse for a lookup table, perhaps we could
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index c3ea63852f..2e494fd1bc 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -20,6 +20,8 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Function.h"
#include "llvm/IRBuilder.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
#include "llvm/LLVMContext.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
@@ -100,6 +102,15 @@ static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
return true;
}
+static bool callHasFloatingPointArgument(const CallInst *CI) {
+ for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
+ it != e; ++it) {
+ if ((*it)->getType()->isFloatingPointTy())
+ return true;
+ }
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Fortified Library Call Optimizations
//===----------------------------------------------------------------------===//
@@ -951,7 +962,14 @@ struct MemCmpOpt : public LibCallOptimization {
// Make sure we're not reading out-of-bounds memory.
if (Len > LHSStr.size() || Len > RHSStr.size())
return 0;
- uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len);
+ // Fold the memcmp and normalize the result. This way we get consistent
+ // results across multiple platforms.
+ uint64_t Ret = 0;
+ int Cmp = memcmp(LHSStr.data(), RHSStr.data(), Len);
+ if (Cmp < 0)
+ Ret = -1;
+ else if (Cmp > 0)
+ Ret = 1;
return ConstantInt::get(CI->getType(), Ret);
}
@@ -1016,6 +1034,381 @@ struct MemSetOpt : public LibCallOptimization {
}
};
+//===----------------------------------------------------------------------===//
+// Math Library Optimizations
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
+
+struct UnaryDoubleFPOpt : public LibCallOptimization {
+ bool CheckRetType;
+ UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {}
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
+ !FT->getParamType(0)->isDoubleTy())
+ return 0;
+
+ if (CheckRetType) {
+ // Check if all the uses for function like 'sin' are converted to float.
+ for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end();
+ ++UseI) {
+ FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI);
+ if (Cast == 0 || !Cast->getType()->isFloatTy())
+ return 0;
+ }
+ }
+
+ // If this is something like 'floor((double)floatval)', convert to floorf.
+ FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
+ if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
+ return 0;
+
+ // floor((double)floatval) -> (double)floorf(floatval)
+ Value *V = Cast->getOperand(0);
+ V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
+ return B.CreateFPExt(V, B.getDoubleTy());
+ }
+};
+
+struct UnsafeFPLibCallOptimization : public LibCallOptimization {
+ bool UnsafeFPShrink;
+ UnsafeFPLibCallOptimization(bool UnsafeFPShrink) {
+ this->UnsafeFPShrink = UnsafeFPShrink;
+ }
+};
+
+struct CosOpt : public UnsafeFPLibCallOptimization {
+ CosOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *Ret = NULL;
+ if (UnsafeFPShrink && Callee->getName() == "cos" &&
+ TLI->has(LibFunc::cosf)) {
+ UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+ Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
+ }
+
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 1 argument of FP type, which matches the
+ // result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return Ret;
+
+ // cos(-x) -> cos(x)
+ Value *Op1 = CI->getArgOperand(0);
+ if (BinaryOperator::isFNeg(Op1)) {
+ BinaryOperator *BinExpr = cast<BinaryOperator>(Op1);
+ return B.CreateCall(Callee, BinExpr->getOperand(1), "cos");
+ }
+ return Ret;
+ }
+};
+
+struct PowOpt : public UnsafeFPLibCallOptimization {
+ PowOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *Ret = NULL;
+ if (UnsafeFPShrink && Callee->getName() == "pow" &&
+ TLI->has(LibFunc::powf)) {
+ UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+ Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
+ }
+
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 2 arguments of the same FP type, which match the
+ // result type.
+ if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return Ret;
+
+ Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
+ if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+ if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0
+ return Op1C;
+ if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x)
+ return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
+ }
+
+ ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
+ if (Op2C == 0) return Ret;
+
+ if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
+ return ConstantFP::get(CI->getType(), 1.0);
+
+ if (Op2C->isExactlyValue(0.5)) {
+ // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
+ // This is faster than calling pow, and still handles negative zero
+ // and negative infinity correctly.
+ // TODO: In fast-math mode, this could be just sqrt(x).
+ // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
+ Value *Inf = ConstantFP::getInfinity(CI->getType());
+ Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
+ Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B,
+ Callee->getAttributes());
+ Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B,
+ Callee->getAttributes());
+ Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
+ Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
+ return Sel;
+ }
+
+ if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x
+ return Op1;
+ if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x
+ return B.CreateFMul(Op1, Op1, "pow2");
+ if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
+ return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0),
+ Op1, "powrecip");
+ return 0;
+ }
+};
+
+struct Exp2Opt : public UnsafeFPLibCallOptimization {
+ Exp2Opt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *Ret = NULL;
+ if (UnsafeFPShrink && Callee->getName() == "exp2" &&
+ TLI->has(LibFunc::exp2)) {
+ UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+ Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
+ }
+
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 1 argument of FP type, which matches the
+ // result type.
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return Ret;
+
+ Value *Op = CI->getArgOperand(0);
+ // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
+ // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
+ Value *LdExpArg = 0;
+ if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
+ LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
+ } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
+ LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
+ }
+
+ if (LdExpArg) {
+ const char *Name;
+ if (Op->getType()->isFloatTy())
+ Name = "ldexpf";
+ else if (Op->getType()->isDoubleTy())
+ Name = "ldexp";
+ else
+ Name = "ldexpl";
+
+ Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
+ if (!Op->getType()->isFloatTy())
+ One = ConstantExpr::getFPExtend(One, Op->getType());
+
+ Module *M = Caller->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+ Op->getType(),
+ B.getInt32Ty(), NULL);
+ CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+ }
+ return Ret;
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Integer Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct FFSOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 2 arguments of the same FP type, which match the
+ // result type.
+ if (FT->getNumParams() != 1 ||
+ !FT->getReturnType()->isIntegerTy(32) ||
+ !FT->getParamType(0)->isIntegerTy())
+ return 0;
+
+ Value *Op = CI->getArgOperand(0);
+
+ // Constant fold.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ if (CI->isZero()) // ffs(0) -> 0.
+ return B.getInt32(0);
+ // ffs(c) -> cttz(c)+1
+ return B.getInt32(CI->getValue().countTrailingZeros() + 1);
+ }
+
+ // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
+ Type *ArgType = Op->getType();
+ Value *F = Intrinsic::getDeclaration(Callee->getParent(),
+ Intrinsic::cttz, ArgType);
+ Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
+ V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
+ V = B.CreateIntCast(V, B.getInt32Ty(), false);
+
+ Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType));
+ return B.CreateSelect(Cond, V, B.getInt32(0));
+ }
+};
+
+struct AbsOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ // We require integer(integer) where the types agree.
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ FT->getParamType(0) != FT->getReturnType())
+ return 0;
+
+ // abs(x) -> x >s -1 ? x : -x
+ Value *Op = CI->getArgOperand(0);
+ Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()),
+ "ispos");
+ Value *Neg = B.CreateNeg(Op, "neg");
+ return B.CreateSelect(Pos, Op, Neg);
+ }
+};
+
+struct IsDigitOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ // We require integer(i32)
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ !FT->getParamType(0)->isIntegerTy(32))
+ return 0;
+
+ // isdigit(c) -> (c-'0') <u 10
+ Value *Op = CI->getArgOperand(0);
+ Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
+ Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit");
+ return B.CreateZExt(Op, CI->getType());
+ }
+};
+
+struct IsAsciiOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ // We require integer(i32)
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+ !FT->getParamType(0)->isIntegerTy(32))
+ return 0;
+
+ // isascii(c) -> c <u 128
+ Value *Op = CI->getArgOperand(0);
+ Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
+ return B.CreateZExt(Op, CI->getType());
+ }
+};
+
+struct ToAsciiOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ FunctionType *FT = Callee->getFunctionType();
+ // We require i32(i32)
+ if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+ !FT->getParamType(0)->isIntegerTy(32))
+ return 0;
+
+ // toascii(c) -> c & 0x7f
+ return B.CreateAnd(CI->getArgOperand(0),
+ ConstantInt::get(CI->getType(),0x7F));
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Formatting and IO Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct PrintFOpt : public LibCallOptimization {
+ Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) {
+ // Check for a fixed format string.
+ StringRef FormatStr;
+ if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
+ return 0;
+
+ // Empty format string -> noop.
+ if (FormatStr.empty()) // Tolerate printf's declared void.
+ return CI->use_empty() ? (Value*)CI :
+ ConstantInt::get(CI->getType(), 0);
+
+ // Do not do any of the following transformations if the printf return value
+ // is used, in general the printf return value is not compatible with either
+ // putchar() or puts().
+ if (!CI->use_empty())
+ return 0;
+
+ // printf("x") -> putchar('x'), even for '%'.
+ if (FormatStr.size() == 1) {
+ Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI);
+ if (CI->use_empty() || !Res) return Res;
+ return B.CreateIntCast(Res, CI->getType(), true);
+ }
+
+ // printf("foo\n") --> puts("foo")
+ if (FormatStr[FormatStr.size()-1] == '\n' &&
+ FormatStr.find('%') == std::string::npos) { // no format characters.
+ // Create a string literal with no \n on it. We expect the constant merge
+ // pass to be run after this pass, to merge duplicate strings.
+ FormatStr = FormatStr.drop_back();
+ Value *GV = B.CreateGlobalString(FormatStr, "str");
+ Value *NewCI = EmitPutS(GV, B, TD, TLI);
+ return (CI->use_empty() || !NewCI) ?
+ NewCI :
+ ConstantInt::get(CI->getType(), FormatStr.size()+1);
+ }
+
+ // Optimize specific format strings.
+ // printf("%c", chr) --> putchar(chr)
+ if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isIntegerTy()) {
+ Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI);
+
+ if (CI->use_empty() || !Res) return Res;
+ return B.CreateIntCast(Res, CI->getType(), true);
+ }
+
+ // printf("%s\n", str) --> puts(str)
+ if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isPointerTy()) {
+ return EmitPutS(CI->getArgOperand(1), B, TD, TLI);
+ }
+ return 0;
+ }
+
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require one fixed pointer argument and an integer/void result.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+ !(FT->getReturnType()->isIntegerTy() ||
+ FT->getReturnType()->isVoidTy()))
+ return 0;
+
+ if (Value *V = optimizeFixedFormatString(Callee, CI, B)) {
+ return V;
+ }
+
+ // printf(format, ...) -> iprintf(format, ...) if no floating point
+ // arguments.
+ if (TLI->has(LibFunc::iprintf) && !callHasFloatingPointArgument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Constant *IPrintFFn =
+ M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(IPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+ return 0;
+ }
+};
+
} // End anonymous namespace.
namespace llvm {
@@ -1024,6 +1417,7 @@ class LibCallSimplifierImpl {
const DataLayout *TD;
const TargetLibraryInfo *TLI;
const LibCallSimplifier *LCS;
+ bool UnsafeFPShrink;
StringMap<LibCallOptimization*> Optimizations;
// Fortified library call optimizations.
@@ -1057,14 +1451,33 @@ class LibCallSimplifierImpl {
MemMoveOpt MemMove;
MemSetOpt MemSet;
+ // Math library call optimizations.
+ UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP;
+ CosOpt Cos; PowOpt Pow; Exp2Opt Exp2;
+
+ // Integer library call optimizations.
+ FFSOpt FFS;
+ AbsOpt Abs;
+ IsDigitOpt IsDigit;
+ IsAsciiOpt IsAscii;
+ ToAsciiOpt ToAscii;
+
+ // Formatting and IO library call optimizations.
+ PrintFOpt PrintF;
+
void initOptimizations();
void addOpt(LibFunc::Func F, LibCallOptimization* Opt);
+ void addOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt);
public:
LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI,
- const LibCallSimplifier *LCS) {
+ const LibCallSimplifier *LCS,
+ bool UnsafeFPShrink = false)
+ : UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true),
+ Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) {
this->TD = TD;
this->TLI = TLI;
this->LCS = LCS;
+ this->UnsafeFPShrink = UnsafeFPShrink;
}
Value *optimizeCall(CallInst *CI);
@@ -1108,6 +1521,73 @@ void LibCallSimplifierImpl::initOptimizations() {
addOpt(LibFunc::memcpy, &MemCpy);
addOpt(LibFunc::memmove, &MemMove);
addOpt(LibFunc::memset, &MemSet);
+
+ // Math library call optimizations.
+ addOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP);
+ addOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP);
+ addOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP);
+ addOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP);
+ addOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP);
+ addOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP);
+ addOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP);
+
+ if(UnsafeFPShrink) {
+ addOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP);
+ addOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP);
+ }
+
+ addOpt(LibFunc::cosf, &Cos);
+ addOpt(LibFunc::cos, &Cos);
+ addOpt(LibFunc::cosl, &Cos);
+ addOpt(LibFunc::powf, &Pow);
+ addOpt(LibFunc::pow, &Pow);
+ addOpt(LibFunc::powl, &Pow);
+ Optimizations["llvm.pow.f32"] = &Pow;
+ Optimizations["llvm.pow.f64"] = &Pow;
+ Optimizations["llvm.pow.f80"] = &Pow;
+ Optimizations["llvm.pow.f128"] = &Pow;
+ Optimizations["llvm.pow.ppcf128"] = &Pow;
+ addOpt(LibFunc::exp2l, &Exp2);
+ addOpt(LibFunc::exp2, &Exp2);
+ addOpt(LibFunc::exp2f, &Exp2);
+ Optimizations["llvm.exp2.ppcf128"] = &Exp2;
+ Optimizations["llvm.exp2.f128"] = &Exp2;
+ Optimizations["llvm.exp2.f80"] = &Exp2;
+ Optimizations["llvm.exp2.f64"] = &Exp2;
+ Optimizations["llvm.exp2.f32"] = &Exp2;
+
+ // Integer library call optimizations.
+ addOpt(LibFunc::ffs, &FFS);
+ addOpt(LibFunc::ffsl, &FFS);
+ addOpt(LibFunc::ffsll, &FFS);
+ addOpt(LibFunc::abs, &Abs);
+ addOpt(LibFunc::labs, &Abs);
+ addOpt(LibFunc::llabs, &Abs);
+ addOpt(LibFunc::isdigit, &IsDigit);
+ addOpt(LibFunc::isascii, &IsAscii);
+ addOpt(LibFunc::toascii, &ToAscii);
+
+ // Formatting and IO library call optimizations.
+ addOpt(LibFunc::printf, &PrintF);
}
Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
@@ -1128,9 +1608,16 @@ void LibCallSimplifierImpl::addOpt(LibFunc::Func F, LibCallOptimization* Opt) {
Optimizations[TLI->getName(F)] = Opt;
}
+void LibCallSimplifierImpl::addOpt(LibFunc::Func F1, LibFunc::Func F2,
+ LibCallOptimization* Opt) {
+ if (TLI->has(F1) && TLI->has(F2))
+ Optimizations[TLI->getName(F1)] = Opt;
+}
+
LibCallSimplifier::LibCallSimplifier(const DataLayout *TD,
- const TargetLibraryInfo *TLI) {
- Impl = new LibCallSimplifierImpl(TD, TLI, this);
+ const TargetLibraryInfo *TLI,
+ bool UnsafeFPShrink) {
+ Impl = new LibCallSimplifierImpl(TD, TLI, this, UnsafeFPShrink);
}
LibCallSimplifier::~LibCallSimplifier() {
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index 4653a7d7c8..dacbc7f242 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -28,6 +28,7 @@
#include "llvm/Type.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
@@ -483,6 +484,10 @@ namespace {
if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
T2 = SI->getCondition()->getType();
+ } else if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(I)) {
+ T2 = SI->getOperand(0)->getType();
+ } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
+ T2 = CI->getOperand(0)->getType();
}
}
@@ -671,6 +676,19 @@ namespace {
return false;
}
+
+ bool isPureIEChain(InsertElementInst *IE) {
+ InsertElementInst *IENext = IE;
+ do {
+ if (!isa<UndefValue>(IENext->getOperand(0)) &&
+ !isa<InsertElementInst>(IENext->getOperand(0))) {
+ return false;
+ }
+ } while ((IENext =
+ dyn_cast<InsertElementInst>(IENext->getOperand(0))));
+
+ return true;
+ }
};
// This function implements one vectorization iteration on the provided
@@ -987,10 +1005,11 @@ namespace {
// We don't want to fuse to a type that will be split, even
// if the two input types will also be split and there is no other
// associated cost.
- unsigned VParts = VTTI->getNumberOfParts(VT1);
- if (VParts > 1)
+ unsigned VParts1 = VTTI->getNumberOfParts(VT1),
+ VParts2 = VTTI->getNumberOfParts(VT2);
+ if (VParts1 > 1 || VParts2 > 1)
return false;
- else if (!VParts && VCost == ICost + JCost)
+ else if ((!VParts1 || !VParts2) && VCost == ICost + JCost)
return false;
CostSavings = ICost + JCost - VCost;
@@ -1466,7 +1485,7 @@ namespace {
PrunedTree.insert(QTop.first);
// Visit each child, pruning as necessary...
- DenseMap<ValuePair, size_t> BestChildren;
+ SmallVector<ValuePairWithDepth, 8> BestChildren;
VPPIteratorPair QTopRange = ConnectedPairs.equal_range(QTop.first);
for (std::multimap<ValuePair, ValuePair>::iterator K = QTopRange.first;
K != QTopRange.second; ++K) {
@@ -1498,7 +1517,7 @@ namespace {
DenseSet<ValuePair> CurrentPairs;
bool CanAdd = true;
- for (DenseMap<ValuePair, size_t>::iterator C2
+ for (SmallVector<ValuePairWithDepth, 8>::iterator C2
= BestChildren.begin(), E2 = BestChildren.end();
C2 != E2; ++C2) {
if (C2->first.first == C->first.first ||
@@ -1583,22 +1602,22 @@ namespace {
// to an already-selected child. Check for this here, and if a
// conflict is found, then remove the previously-selected child
// before adding this one in its place.
- for (DenseMap<ValuePair, size_t>::iterator C2
+ for (SmallVector<ValuePairWithDepth, 8>::iterator C2
= BestChildren.begin(); C2 != BestChildren.end();) {
if (C2->first.first == C->first.first ||
C2->first.first == C->first.second ||
C2->first.second == C->first.first ||
C2->first.second == C->first.second ||
pairsConflict(C2->first, C->first, PairableInstUsers))
- BestChildren.erase(C2++);
+ C2 = BestChildren.erase(C2);
else
++C2;
}
- BestChildren.insert(ValuePairWithDepth(C->first, C->second));
+ BestChildren.push_back(ValuePairWithDepth(C->first, C->second));
}
- for (DenseMap<ValuePair, size_t>::iterator C
+ for (SmallVector<ValuePairWithDepth, 8>::iterator C
= BestChildren.begin(), E2 = BestChildren.end();
C != E2; ++C) {
size_t DepthF = getDepthFactor(C->first.first);
@@ -1683,10 +1702,20 @@ namespace {
// The set of pairs that have already contributed to the total cost.
DenseSet<ValuePair> IncomingPairs;
+ // If the cost model were perfect, this might not be necessary; but we
+ // need to make sure that we don't get stuck vectorizing our own
+ // shuffle chains.
+ bool HasNontrivialInsts = false;
+
// The node weights represent the cost savings associated with
// fusing the pair of instructions.
for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
E = PrunedTree.end(); S != E; ++S) {
+ if (!isa<ShuffleVectorInst>(S->first) &&
+ !isa<InsertElementInst>(S->first) &&
+ !isa<ExtractElementInst>(S->first))
+ HasNontrivialInsts = true;
+
bool FlipOrder = false;
if (getDepthFactor(S->first)) {
@@ -1760,9 +1789,12 @@ namespace {
bool NeedsExtraction = false;
for (Value::use_iterator I = S->first->use_begin(),
IE = S->first->use_end(); I != IE; ++I) {
- if (isa<ShuffleVectorInst>(*I) ||
- isa<InsertElementInst>(*I) ||
- isa<ExtractElementInst>(*I))
+ if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) {
+ // Shuffle can be folded if it has no other input
+ if (isa<UndefValue>(SI->getOperand(1)))
+ continue;
+ }
+ if (isa<ExtractElementInst>(*I))
continue;
if (PrunedTreeInstrs.count(*I))
continue;
@@ -1787,9 +1819,12 @@ namespace {
NeedsExtraction = false;
for (Value::use_iterator I = S->second->use_begin(),
IE = S->second->use_end(); I != IE; ++I) {
- if (isa<ShuffleVectorInst>(*I) ||
- isa<InsertElementInst>(*I) ||
- isa<ExtractElementInst>(*I))
+ if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) {
+ // Shuffle can be folded if it has no other input
+ if (isa<UndefValue>(SI->getOperand(1)))
+ continue;
+ }
+ if (isa<ExtractElementInst>(*I))
continue;
if (PrunedTreeInstrs.count(*I))
continue;
@@ -1839,14 +1874,37 @@ namespace {
// Combining vector operations of the same type is also assumed
// folded with other operations.
- if (Ty1 == Ty2 &&
- (isa<ShuffleVectorInst>(O1) ||
- isa<InsertElementInst>(O1) ||
- isa<InsertElementInst>(O1)) &&
- (isa<ShuffleVectorInst>(O2) ||
- isa<InsertElementInst>(O2) ||
- isa<InsertElementInst>(O2)))
- continue;
+ if (Ty1 == Ty2) {
+ // If both are insert elements, then both can be widened.
+ InsertElementInst *IEO1 = dyn_cast<InsertElementInst>(O1),
+ *IEO2 = dyn_cast<InsertElementInst>(O2);
+ if (IEO1 && IEO2 && isPureIEChain(IEO1) && isPureIEChain(IEO2))
+ continue;
+ // If both are extract elements, and both have the same input
+ // type, then they can be replaced with a shuffle
+ ExtractElementInst *EIO1 = dyn_cast<ExtractElementInst>(O1),
+ *EIO2 = dyn_cast<ExtractElementInst>(O2);
+ if (EIO1 && EIO2 &&
+ EIO1->getOperand(0)->getType() ==
+ EIO2->getOperand(0)->getType())
+ continue;
+ // If both are a shuffle with equal operand types and only two
+ // unqiue operands, then they can be replaced with a single
+ // shuffle
+ ShuffleVectorInst *SIO1 = dyn_cast<ShuffleVectorInst>(O1),
+ *SIO2 = dyn_cast<ShuffleVectorInst>(O2);
+ if (SIO1 && SIO2 &&
+ SIO1->getOperand(0)->getType() ==
+ SIO2->getOperand(0)->getType()) {
+ SmallSet<Value *, 4> SIOps;
+ SIOps.insert(SIO1->getOperand(0));
+ SIOps.insert(SIO1->getOperand(1));
+ SIOps.insert(SIO2->getOperand(0));
+ SIOps.insert(SIO2->getOperand(1));
+ if (SIOps.size() <= 2)
+ continue;
+ }
+ }
int ESContrib;
// This pair has already been formed.
@@ -1894,6 +1952,13 @@ namespace {
}
}
}
+
+ if (!HasNontrivialInsts) {
+ DEBUG(if (DebugPairSelection) dbgs() <<
+ "\tNo non-trivial instructions in tree;"
+ " override to zero effective size\n");
+ EffSize = 0;
+ }
} else {
for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
E = PrunedTree.end(); S != E; ++S)
@@ -2092,18 +2157,7 @@ namespace {
if (InsertElementInst *LIE = dyn_cast<InsertElementInst>(LOp)) {
// If we have a pure insertelement chain, then this can be rewritten
// into a chain that directly builds the larger type.
- bool PureChain = true;
- InsertElementInst *LIENext = LIE;
- do {
- if (!isa<UndefValue>(LIENext->getOperand(0)) &&
- !isa<InsertElementInst>(LIENext->getOperand(0))) {
- PureChain = false;
- break;
- }
- } while ((LIENext =
- dyn_cast<InsertElementInst>(LIENext->getOperand(0))));
-
- if (PureChain) {
+ if (isPureIEChain(LIE)) {
SmallVector<Value *, 8> VectElemts(numElemL,
UndefValue::get(ArgTypeL->getScalarType()));
InsertElementInst *LIENext = LIE;
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index a7ef248e6e..55733f7f8a 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -25,6 +25,7 @@
// 4. LoopVectorizationCostModel - A unit that checks for the profitability
// of vectorization. It decides on the optimal vector width, which
// can be one, if vectorization is not profitable.
+//
//===----------------------------------------------------------------------===//
//
// The reduction-variable vectorization is based on the paper:
@@ -36,6 +37,9 @@
// Other ideas/concepts are from:
// A. Zaks and D. Nuzman. Autovectorization in GCC-two years later.
//
+// S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua. An Evaluation of
+// Vectorizing Compilers.
+//
//===----------------------------------------------------------------------===//
#define LV_NAME "loop-vectorize"
#define DEBUG_TYPE LV_NAME
@@ -82,6 +86,9 @@ const unsigned TinyTripCountThreshold = 16;
/// number of pointers. Notice that the check is quadratic!
const unsigned RuntimeMemoryCheckThreshold = 2;
+/// This is the highest vector width that we try to generate.
+const unsigned MaxVectorSize = 8;
+
namespace {
// Forward declarations.
@@ -106,23 +113,28 @@ class SingleBlockLoopVectorizer {
public:
/// Ctor.
SingleBlockLoopVectorizer(Loop *Orig, ScalarEvolution *Se, LoopInfo *Li,
- DominatorTree *dt, LPPassManager *Lpm,
+ DominatorTree *Dt, DataLayout *Dl,
+ LPPassManager *Lpm,
unsigned VecWidth):
- OrigLoop(Orig), SE(Se), LI(Li), DT(dt), LPM(Lpm), VF(VecWidth),
+ OrigLoop(Orig), SE(Se), LI(Li), DT(Dt), DL(Dl), LPM(Lpm), VF(VecWidth),
Builder(Se->getContext()), Induction(0), OldInduction(0) { }
// Perform the actual loop widening (vectorization).
void vectorize(LoopVectorizationLegality *Legal) {
- ///Create a new empty loop. Unlink the old loop and connect the new one.
+ // Create a new empty loop. Unlink the old loop and connect the new one.
createEmptyLoop(Legal);
- /// Widen each instruction in the old loop to a new one in the new loop.
- /// Use the Legality module to find the induction and reduction variables.
+ // Widen each instruction in the old loop to a new one in the new loop.
+ // Use the Legality module to find the induction and reduction variables.
vectorizeLoop(Legal);
// Register the new loop and update the analysis passes.
updateAnalysis();
}
private:
+ /// Add code that checks at runtime if the accessed arrays overlap.
+ /// Returns the comperator value or NULL if no check is needed.
+ Value *addRuntimeCheck(LoopVectorizationLegality *Legal,
+ Instruction *Loc);
/// Create an empty loop, based on the loop ranges of the old loop.
void createEmptyLoop(LoopVectorizationLegality *Legal);
/// Copy and widen the instructions from the old loop.
@@ -167,6 +179,8 @@ private:
LoopInfo *LI;
// Dominator Tree.
DominatorTree *DT;
+ // Data Layout.
+ DataLayout *DL;
// Loop Pass Manager;
LPPassManager *LPM;
// The vectorization factor to use.
@@ -250,16 +264,46 @@ public:
// This POD struct holds information about the memory runtime legality
// check that a group of pointers do not overlap.
struct RuntimePointerCheck {
+ RuntimePointerCheck(): Need(false) {}
+
+ /// Reset the state of the pointer runtime information.
+ void reset() {
+ Need = false;
+ Pointers.clear();
+ Starts.clear();
+ Ends.clear();
+ }
+
+ /// Insert a pointer and calculate the start and end SCEVs.
+ void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr) {
+ const SCEV *Sc = SE->getSCEV(Ptr);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
+ assert(AR && "Invalid addrec expression");
+ const SCEV *Ex = SE->getExitCount(Lp, Lp->getHeader());
+ const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
+ Pointers.push_back(Ptr);
+ Starts.push_back(AR->getStart());
+ Ends.push_back(ScEnd);
+ }
+
/// This flag indicates if we need to add the runtime check.
bool Need;
/// Holds the pointers that we need to check.
SmallVector<Value*, 2> Pointers;
+ /// Holds the pointer value at the beginning of the loop.
+ SmallVector<const SCEV*, 2> Starts;
+ /// Holds the pointer value at the end of the loop.
+ SmallVector<const SCEV*, 2> Ends;
};
/// ReductionList contains the reduction descriptors for all
/// of the reductions that were found in the loop.
typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList;
+ /// InductionList saves induction variables and maps them to the initial
+ /// value entring the loop.
+ typedef DenseMap<PHINode*, Value*> InductionList;
+
/// Returns true if it is legal to vectorize this loop.
/// This does not mean that it is profitable to vectorize this
/// loop, only that it is legal to do so.
@@ -271,11 +315,14 @@ public:
/// Returns the reduction variables found in the loop.
ReductionList *getReductionVars() { return &Reductions; }
- /// Check if the pointer returned by this GEP is consecutive
- /// when the index is vectorized. This happens when the last
- /// index of the GEP is consecutive, like the induction variable.
+ /// Returns the induction variables found in the loop.
+ InductionList *getInductionVars() { return &Inductions; }
+
+ /// Check if this pointer is consecutive when vectorizing. This happens
+ /// when the last index of the GEP is the induction variable, or that the
+ /// pointer itself is an induction variable.
/// This check allows us to vectorize A[idx] into a wide load/store.
- bool isConsecutiveGep(Value *Ptr);
+ bool isConsecutivePtr(Value *Ptr);
/// Returns true if the value V is uniform within the loop.
bool isUniform(Value *V);
@@ -317,10 +364,16 @@ private:
// --- vectorization state --- //
- /// Holds the induction variable.
+ /// Holds the integer induction variable. This is the counter of the
+ /// loop.
PHINode *Induction;
/// Holds the reduction variables.
ReductionList Reductions;
+ /// Holds all of the induction variables that we found in the loop.
+ /// Notice that inductions don't need to start at zero and that induction
+ /// variables can be pointers.
+ InductionList Inductions;
+
/// Allowed outside users. This holds the reduction
/// vars which can be accessed from outside the loop.
SmallPtrSet<Value*, 4> AllowedExit;
@@ -350,7 +403,7 @@ public:
/// Returns the most profitable vectorization factor for the loop that is
/// smaller or equal to the VF argument. This method checks every power
/// of two up to VF.
- unsigned findBestVectorizationFactor(unsigned VF = 8);
+ unsigned findBestVectorizationFactor(unsigned VF = MaxVectorSize);
private:
/// Returns the expected execution cost. The unit of the cost does
@@ -438,7 +491,7 @@ struct LoopVectorize : public LoopPass {
"\n");
// If we decided that it is *legal* to vectorizer the loop then do it.
- SingleBlockLoopVectorizer LB(L, SE, LI, DT, &LPM, VF);
+ SingleBlockLoopVectorizer LB(L, SE, LI, DT, DL, &LPM, VF);
LB.vectorize(&LVL);
DEBUG(verifyFunction(*L->getHeader()->getParent()));
@@ -459,10 +512,6 @@ struct LoopVectorize : public LoopPass {
};
Value *SingleBlockLoopVectorizer::getBroadcastInstrs(Value *V) {
- // Instructions that access the old induction variable
- // actually want to get the new one.
- if (V == OldInduction)
- V = Induction;
// Create the types.
LLVMContext &C = V->getContext();
Type *VTy = VectorType::get(V->getType(), VF);
@@ -502,7 +551,14 @@ Value *SingleBlockLoopVectorizer::getConsecutiveVector(Value* Val) {
return Builder.CreateAdd(Val, Cv, "induction");
}
-bool LoopVectorizationLegality::isConsecutiveGep(Value *Ptr) {
+bool LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
+ assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr");
+
+ // If this pointer is an induction variable, return it.
+ PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
+ if (Phi && getInductionVars()->count(Phi))
+ return true;
+
GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
if (!Gep)
return false;
@@ -549,13 +605,7 @@ Value *SingleBlockLoopVectorizer::getVectorValue(Value *V) {
Constant*
SingleBlockLoopVectorizer::getUniformVector(unsigned Val, Type* ScalarTy) {
- SmallVector<Constant*, 8> Indices;
- // Create a vector of consecutive numbers from zero to VF.
- for (unsigned i = 0; i < VF; ++i)
- Indices.push_back(ConstantInt::get(ScalarTy, Val, true));
-
- // Add the consecutive indices to the vector value.
- return ConstantVector::get(Indices);
+ return ConstantVector::getSplat(VF, ConstantInt::get(ScalarTy, Val, true));
}
void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
@@ -569,7 +619,7 @@ void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
// If we are accessing the old induction variable, use the new one.
if (SrcOp == OldInduction) {
- Params.push_back(getBroadcastInstrs(Induction));
+ Params.push_back(getVectorValue(Induction));
continue;
}
@@ -628,6 +678,67 @@ void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
WidenMap[Instr] = VecResults;
}
+Value*
+SingleBlockLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
+ Instruction *Loc) {
+ LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck =
+ Legal->getRuntimePointerCheck();
+
+ if (!PtrRtCheck->Need)
+ return NULL;
+
+ Value *MemoryRuntimeCheck = 0;
+ unsigned NumPointers = PtrRtCheck->Pointers.size();
+ SmallVector<Value* , 2> Starts;
+ SmallVector<Value* , 2> Ends;
+
+ SCEVExpander Exp(*SE, "induction");
+
+ // Use this type for pointer arithmetic.
+ Type* PtrArithTy = PtrRtCheck->Pointers[0]->getType();
+
+ for (unsigned i=0; i < NumPointers; ++i) {
+ Value *Ptr = PtrRtCheck->Pointers[i];
+ const SCEV *Sc = SE->getSCEV(Ptr);
+
+ if (SE->isLoopInvariant(Sc, OrigLoop)) {
+ DEBUG(dbgs() << "LV1: Adding RT check for a loop invariant ptr:" <<
+ *Ptr <<"\n");
+ Starts.push_back(Ptr);
+ Ends.push_back(Ptr);
+ } else {
+ DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n");
+
+ Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i],
+ PtrArithTy, Loc);
+ Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc);
+ Starts.push_back(Start);
+ Ends.push_back(End);
+ }
+ }
+
+ for (unsigned i = 0; i < NumPointers; ++i) {
+ for (unsigned j = i+1; j < NumPointers; ++j) {
+ Value *Cmp0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
+ Starts[i], Ends[j], "bound0", Loc);
+ Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
+ Starts[j], Ends[i], "bound1", Loc);
+ Value *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, Cmp1,
+ "found.conflict", Loc);
+ if (MemoryRuntimeCheck)
+ MemoryRuntimeCheck = BinaryOperator::Create(Instruction::Or,
+ MemoryRuntimeCheck,
+ IsConflict,
+ "conflict.rdx", Loc);
+ else
+ MemoryRuntimeCheck = IsConflict;
+
+ }
+ }
+
+ return MemoryRuntimeCheck;
+}
+
void
SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
/*
@@ -659,9 +770,18 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
...
*/
+ BasicBlock *OldBasicBlock = OrigLoop->getHeader();
+ BasicBlock *BypassBlock = OrigLoop->getLoopPreheader();
+ BasicBlock *ExitBlock = OrigLoop->getExitBlock();
+ assert(ExitBlock && "Must have an exit block");
+
+ // Some loops have a single integer induction variable, while other loops
+ // don't. One example is c++ iterators that often have multiple pointer
+ // induction variables. In the code below we also support a case where we
+ // don't have a single induction variable.
OldInduction = Legal->getInduction();
- assert(OldInduction && "We must have a single phi node.");
- Type *IdxTy = OldInduction->getType();
+ Type *IdxTy = OldInduction ? OldInduction->getType() :
+ DL->getIntPtrType(SE->getContext());
// Find the loop boundaries.
const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader());
@@ -670,35 +790,42 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
// Get the total trip count from the count by adding 1.
ExitCount = SE->getAddExpr(ExitCount,
SE->getConstant(ExitCount->getType(), 1));
- // We may need to extend the index in case there is a type mismatch.
- // We know that the count starts at zero and does not overflow.
- // We are using Zext because it should be less expensive.
- if (ExitCount->getType() != IdxTy)
- ExitCount = SE->getZeroExtendExpr(ExitCount, IdxTy);
- // This is the original scalar-loop preheader.
- BasicBlock *BypassBlock = OrigLoop->getLoopPreheader();
- BasicBlock *ExitBlock = OrigLoop->getExitBlock();
- assert(ExitBlock && "Must have an exit block");
+ // Expand the trip count and place the new instructions in the preheader.
+ // Notice that the pre-header does not change, only the loop body.
+ SCEVExpander Exp(*SE, "induction");
- // The loop index does not have to start at Zero. It starts with this value.
- Value *StartIdx = OldInduction->getIncomingValueForBlock(BypassBlock);
+ // Count holds the overall loop count (N).
+ Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
+ BypassBlock->getTerminator());
+
+ // The loop index does not have to start at Zero. Find the original start
+ // value from the induction PHI node. If we don't have an induction variable
+ // then we know that it starts at zero.
+ Value *StartIdx = OldInduction ?
+ OldInduction->getIncomingValueForBlock(BypassBlock):
+ ConstantInt::get(IdxTy, 0);
assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop");
assert(BypassBlock && "Invalid loop structure");
+ // Generate the code that checks in runtime if arrays overlap.
+ Value *MemoryRuntimeCheck = addRuntimeCheck(Legal,
+ BypassBlock->getTerminator());
+
+ // Split the single block loop into the two loop structure described above.
BasicBlock *VectorPH =
BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph");
- BasicBlock *VecBody = VectorPH->splitBasicBlock(VectorPH->getTerminator(),
- "vector.body");
-
- BasicBlock *MiddleBlock = VecBody->splitBasicBlock(VecBody->getTerminator(),
- "middle.block");
+ BasicBlock *VecBody =
+ VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body");
+ BasicBlock *MiddleBlock =
+ VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block");
BasicBlock *ScalarPH =
- MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(),
- "scalar.preheader");
- // Find the induction variable.
- BasicBlock *OldBasicBlock = OrigLoop->getHeader();
+ MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph");
+
+ // This is the location in which we add all of the logic for bypassing
+ // the new vector loop.
+ Instruction *Loc = BypassBlock->getTerminator();
// Use this IR builder to create the loop instructions (Phi, Br, Cmp)
// inside the loop.
@@ -708,13 +835,16 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
Induction = Builder.CreatePHI(IdxTy, 2, "index");
Constant *Step = ConstantInt::get(IdxTy, VF);
- // Expand the trip count and place the new instructions in the preheader.
- // Notice that the pre-header does not change, only the loop body.
- SCEVExpander Exp(*SE, "induction");
- Instruction *Loc = BypassBlock->getTerminator();
-
- // Count holds the overall loop count (N).
- Value *Count = Exp.expandCodeFor(ExitCount, Induction->getType(), Loc);
+ // We may need to extend the index in case there is a type mismatch.
+ // We know that the count starts at zero and does not overflow.
+ if (Count->getType() != IdxTy) {
+ // The exit count can be of pointer type. Convert it to the correct
+ // integer type.
+ if (ExitCount->getType()->isPointerTy())
+ Count = CastInst::CreatePointerCast(Count, IdxTy, "ptrcnt.to.int", Loc);
+ else
+ Count = CastInst::CreateZExtOrBitCast(Count, IdxTy, "zext.cnt", Loc);
+ }
// Add the start index to the loop count to get the new end index.
Value *IdxEnd = BinaryOperator::CreateAdd(Count, StartIdx, "end.idx", Loc);
@@ -727,84 +857,79 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
Value *IdxEndRoundDown = BinaryOperator::CreateAdd(CountRoundDown, StartIdx,
"end.idx.rnd.down", Loc);
- // Now, compare the new count to zero. If it is zero, jump to the scalar part.
+ // Now, compare the new count to zero. If it is zero skip the vector loop and
+ // jump to the scalar loop.
Value *Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
IdxEndRoundDown,
StartIdx,
"cmp.zero", Loc);
- LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck =
- Legal->getRuntimePointerCheck();
- Value *MemoryRuntimeCheck = 0;
- if (PtrRtCheck->Need) {
- unsigned NumPointers = PtrRtCheck->Pointers.size();
- SmallVector<Value* , 2> Starts;
- SmallVector<Value* , 2> Ends;
-
- // Use this type for pointer arithmetic.
- Type* PtrArithTy = PtrRtCheck->Pointers[0]->getType();
-
- for (unsigned i=0; i < NumPointers; ++i) {
- Value *Ptr = PtrRtCheck->Pointers[i];
- const SCEV *Sc = SE->getSCEV(Ptr);
-
- if (SE->isLoopInvariant(Sc, OrigLoop)) {
- DEBUG(dbgs() << "LV1: Adding RT check for a loop invariant ptr:" <<
- *Ptr <<"\n");
- Starts.push_back(Ptr);
- Ends.push_back(Ptr);
- } else {
- DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n");
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
- Value *Start = Exp.expandCodeFor(AR->getStart(), PtrArithTy, Loc);
- const SCEV *Ex = SE->getExitCount(OrigLoop, OrigLoop->getHeader());
- const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
- assert(!isa<SCEVCouldNotCompute>(ScEnd) && "Invalid scev range.");
- Value *End = Exp.expandCodeFor(ScEnd, PtrArithTy, Loc);
- Starts.push_back(Start);
- Ends.push_back(End);
- }
- }
-
- for (unsigned i=0; i < NumPointers; ++i) {
- for (unsigned j=i+1; j < NumPointers; ++j) {
- Value *Cmp0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
- Starts[0], Ends[1], "bound0", Loc);
- Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
- Starts[1], Ends[0], "bound1", Loc);
- Value *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, Cmp1,
- "found.conflict", Loc);
- if (MemoryRuntimeCheck) {
- MemoryRuntimeCheck = BinaryOperator::Create(Instruction::Or,
- MemoryRuntimeCheck,
- IsConflict,
- "conflict.rdx", Loc);
- } else {
- MemoryRuntimeCheck = IsConflict;
- }
- }
- }
- }// end of need-runtime-check code.
-
// If we are using memory runtime checks, include them in.
- if (MemoryRuntimeCheck) {
+ if (MemoryRuntimeCheck)
Cmp = BinaryOperator::Create(Instruction::Or, Cmp, MemoryRuntimeCheck,
"CntOrMem", Loc);
- }
BranchInst::Create(MiddleBlock, VectorPH, Cmp, Loc);
// Remove the old terminator.
Loc->eraseFromParent();
// We are going to resume the execution of the scalar loop.
- // This PHI decides on what number to start. If we come from the
- // vector loop then we need to start with the end index minus the
- // index modulo VF. If we come from a bypass edge then we need to start
- // from the real start.
- PHINode* ResumeIndex = PHINode::Create(IdxTy, 2, "resume.idx",
- MiddleBlock->getTerminator());
- ResumeIndex->addIncoming(StartIdx, BypassBlock);
- ResumeIndex->addIncoming(IdxEndRoundDown, VecBody);
+ // Go over all of the induction variables that we found and fix the
+ // PHIs that are left in the scalar version of the loop.
+ // The starting values of PHI nodes depend on the counter of the last
+ // iteration in the vectorized loop.
+ // If we come from a bypass edge then we need to start from the original start
+ // value.
+
+ // This variable saves the new starting index for the scalar loop.
+ PHINode *ResumeIndex = 0;
+ LoopVectorizationLegality::InductionList::iterator I, E;
+ LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
+ for (I = List->begin(), E = List->end(); I != E; ++I) {
+ PHINode *OrigPhi = I->first;
+ PHINode *ResumeVal = PHINode::Create(OrigPhi->getType(), 2, "resume.val",
+ MiddleBlock->getTerminator());
+ Value *EndValue = 0;
+ if (OrigPhi->getType()->isIntegerTy()) {
+ // Handle the integer induction counter:
+ assert(OrigPhi == OldInduction && "Unknown integer PHI");
+ // We know what the end value is.
+ EndValue = IdxEndRoundDown;
+ // We also know which PHI node holds it.
+ ResumeIndex = ResumeVal;
+ } else {
+ // For pointer induction variables, calculate the offset using
+ // the end index.
+ EndValue = GetElementPtrInst::Create(I->second, CountRoundDown,
+ "ptr.ind.end",
+ BypassBlock->getTerminator());
+ }
+
+ // The new PHI merges the original incoming value, in case of a bypass,
+ // or the value at the end of the vectorized loop.
+ ResumeVal->addIncoming(I->second, BypassBlock);
+ ResumeVal->addIncoming(EndValue, VecBody);
+
+ // Fix the scalar body counter (PHI node).
+ unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
+ OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
+ }
+
+ // If we are generating a new induction variable then we also need to
+ // generate the code that calculates the exit value. This value is not
+ // simply the end of the counter because we may skip the vectorized body
+ // in case of a runtime check.
+ if (!OldInduction){
+ assert(!ResumeIndex && "Unexpected resume value found");
+ ResumeIndex = PHINode::Create(IdxTy, 2, "new.indc.resume.val",
+ MiddleBlock->getTerminator());
+ ResumeIndex->addIncoming(StartIdx, BypassBlock);
+ ResumeIndex->addIncoming(IdxEndRoundDown, VecBody);
+ }
+
+ // Make sure that we found the index where scalar loop needs to continue.
+ assert(ResumeIndex && ResumeIndex->getType()->isIntegerTy() &&
+ "Invalid resume Index");
// Add a check in the middle block to see if we have completed
// all of the iterations in the first vector loop.
@@ -828,10 +953,6 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
// Now we have two terminators. Remove the old one from the block.
VecBody->getTerminator()->eraseFromParent();
- // Fix the scalar body iteration count.
- unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH);
- OldInduction->setIncomingValue(BlockIdx, ResumeIndex);
-
// Get ready to start creating new instructions into the vectorized body.
Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
@@ -901,7 +1022,7 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// add the new incoming edges to the PHI. At this point all of the
// instructions in the basic block are vectorized, so we can use them to
// construct the PHI.
- PhiVector PHIsToFix;
+ PhiVector RdxPHIsToFix;
// For each instruction in the old loop.
for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
@@ -914,15 +1035,53 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
continue;
case Instruction::PHI:{
PHINode* P = cast<PHINode>(Inst);
- // Special handling for the induction var.
- if (OldInduction == Inst)
+ // Handle reduction variables:
+ if (Legal->getReductionVars()->count(P)) {
+ // This is phase one of vectorizing PHIs.
+ Type *VecTy = VectorType::get(Inst->getType(), VF);
+ WidenMap[Inst] = PHINode::Create(VecTy, 2, "vec.phi",
+ LoopVectorBody->getFirstInsertionPt());
+ RdxPHIsToFix.push_back(P);
+ continue;
+ }
+
+ // This PHINode must be an induction variable.
+ // Make sure that we know about it.
+ assert(Legal->getInductionVars()->count(P) &&
+ "Not an induction variable");
+
+ if (P->getType()->isIntegerTy()) {
+ assert(P == OldInduction && "Unexpected PHI");
+ WidenMap[Inst] = getBroadcastInstrs(Induction);
continue;
- // This is phase one of vectorizing PHIs.
- // This has to be a reduction variable.
- assert(Legal->getReductionVars()->count(P) && "Not a Reduction");
- Type *VecTy = VectorType::get(Inst->getType(), VF);
- WidenMap[Inst] = Builder.CreatePHI(VecTy, 2, "vec.phi");
- PHIsToFix.push_back(P);
+ }
+
+ // Handle pointer inductions.
+ assert(P->getType()->isPointerTy() && "Unexpected type.");
+ Value *StartIdx = OldInduction ?
+ Legal->getInductionVars()->lookup(OldInduction) :
+ ConstantInt::get(Induction->getType(), 0);
+
+ // This is the pointer value coming into the loop.
+ Value *StartPtr = Legal->getInductionVars()->lookup(P);
+
+ // This is the normalized GEP that starts counting at zero.
+ Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
+ "normalized.idx");
+
+ // This is the vector of results. Notice that we don't generate vector
+ // geps because scalar geps result in better code.
+ Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
+ for (unsigned int i = 0; i < VF; ++i) {
+ Constant *Idx = ConstantInt::get(Induction->getType(), i);
+ Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
+ Value *SclrGep = Builder.CreateGEP(StartPtr, GlobalIdx, "next.gep");
+ VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
+ Builder.getInt32(i),
+ "insert.gep");
+ }
+
+ WidenMap[Inst] = VecVal;
continue;
}
case Instruction::Add:
@@ -1010,21 +1169,27 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
// This store does not use GEPs.
- if (!Legal->isConsecutiveGep(Gep)) {
+ if (!Legal->isConsecutivePtr(Ptr)) {
scalarizeInstruction(Inst);
break;
}
- // The last index does not have to be the induction. It can be
- // consecutive and be a function of the index. For example A[I+1];
- unsigned NumOperands = Gep->getNumOperands();
- Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1));
- LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
-
- // Create the new GEP with the new induction variable.
- GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
- Gep2->setOperand(NumOperands - 1, LastIndex);
- Ptr = Builder.Insert(Gep2);
+ if (Gep) {
+ // The last index does not have to be the induction. It can be
+ // consecutive and be a function of the index. For example A[I+1];
+ unsigned NumOperands = Gep->getNumOperands();
+ Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1));
+ LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
+
+ // Create the new GEP with the new induction variable.
+ GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+ Gep2->setOperand(NumOperands - 1, LastIndex);
+ Ptr = Builder.Insert(Gep2);
+ } else {
+ // Use the induction element ptr.
+ assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
+ Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero);
+ }
Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo());
Value *Val = getVectorValue(SI->getValueOperand());
Builder.CreateStore(Val, Ptr)->setAlignment(Alignment);
@@ -1038,23 +1203,31 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
unsigned Alignment = LI->getAlignment();
GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
- // If we don't have a gep, or that the pointer is loop invariant,
+ // If the pointer is loop invariant or if it is non consecutive,
// scalarize the load.
- if (!Gep || Legal->isUniform(Gep) || !Legal->isConsecutiveGep(Gep)) {
+ bool Con = Legal->isConsecutivePtr(Ptr);
+ if (Legal->isUniform(Ptr) || !Con) {
scalarizeInstruction(Inst);
break;
}
- // The last index does not have to be the induction. It can be
- // consecutive and be a function of the index. For example A[I+1];
- unsigned NumOperands = Gep->getNumOperands();
- Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1));
- LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
+ if (Gep) {
+ // The last index does not have to be the induction. It can be
+ // consecutive and be a function of the index. For example A[I+1];
+ unsigned NumOperands = Gep->getNumOperands();
+ Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1));
+ LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
+
+ // Create the new GEP with the new induction variable.
+ GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+ Gep2->setOperand(NumOperands - 1, LastIndex);
+ Ptr = Builder.Insert(Gep2);
+ } else {
+ // Use the induction element ptr.
+ assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
+ Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero);
+ }
- // Create the new GEP with the new induction variable.
- GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
- Gep2->setOperand(NumOperands - 1, LastIndex);
- Ptr = Builder.Insert(Gep2);
Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo());
LI = Builder.CreateLoad(Ptr);
LI->setAlignment(Alignment);
@@ -1098,7 +1271,7 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// Create the 'reduced' values for each of the induction vars.
// The reduced values are the vector values that we scalarize and combine
// after the loop is finished.
- for (PhiVector::iterator it = PHIsToFix.begin(), e = PHIsToFix.end();
+ for (PhiVector::iterator it = RdxPHIsToFix.begin(), e = RdxPHIsToFix.end();
it != e; ++it) {
PHINode *RdxPhi = *it;
PHINode *VecRdxPhi = dyn_cast<PHINode>(WidenMap[RdxPhi]);
@@ -1130,7 +1303,6 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
Value *VectorStart = Builder.CreateInsertElement(Identity,
RdxDesc.StartValue, Zero);
-
// Fix the vector-loop phi.
// We created the induction variable so we know that the
// preheader is the first entry.
@@ -1236,7 +1408,7 @@ bool LoopVectorizationLegality::canVectorize() {
if (!TheLoop->getLoopPreheader()) {
assert(false && "No preheader!!");
DEBUG(dbgs() << "LV: Loop not normalized." << "\n");
- return false;
+ return false;
}
// We can only vectorize single basic block loops.
@@ -1282,23 +1454,34 @@ bool LoopVectorizationLegality::canVectorize() {
}
bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
+
+ BasicBlock *PreHeader = TheLoop->getLoopPreheader();
+
// Scan the instructions in the block and look for hazards.
for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
Instruction *I = it;
- PHINode *Phi = dyn_cast<PHINode>(I);
- if (Phi) {
+ if (PHINode *Phi = dyn_cast<PHINode>(I)) {
// This should not happen because the loop should be normalized.
if (Phi->getNumIncomingValues() != 2) {
DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
return false;
}
- // We only look at integer phi nodes.
- if (!Phi->getType()->isIntegerTy()) {
- DEBUG(dbgs() << "LV: Found an non-int PHI.\n");
+
+ // This is the value coming from the preheader.
+ Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
+
+ // We only look at integer and pointer phi nodes.
+ if (Phi->getType()->isPointerTy() && isInductionVariable(Phi)) {
+ DEBUG(dbgs() << "LV: Found a pointer induction variable.\n");
+ Inductions[Phi] = StartValue;
+ continue;
+ } else if (!Phi->getType()->isIntegerTy()) {
+ DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
return false;
}
+ // Handle integer PHIs:
if (isInductionVariable(Phi)) {
if (Induction) {
DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n");
@@ -1306,6 +1489,7 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
}
DEBUG(dbgs() << "LV: Found the induction PHI."<< *Phi <<"\n");
Induction = Phi;
+ Inductions[Phi] = StartValue;
continue;
}
if (AddReductionVar(Phi, IntegerAdd)) {
@@ -1364,8 +1548,8 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
} // next instr.
if (!Induction) {
- DEBUG(dbgs() << "LV: Did not find an induction var.\n");
- return false;
+ DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
+ assert(getInductionVars()->size() && "No induction variables");
}
// Don't vectorize if the memory dependencies do not allow vectorization.
@@ -1382,15 +1566,10 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
while (Worklist.size()) {
Instruction *I = dyn_cast<Instruction>(Worklist.back());
Worklist.pop_back();
- // Look at instructions inside this block.
- if (!I) continue;
- if (I->getParent() != &BB) continue;
- // Stop when reaching PHI nodes.
- if (isa<PHINode>(I)) {
- assert(I == Induction && "Found a uniform PHI that is not the induction");
- break;
- }
+ // Look at instructions inside this block. Stop when reaching PHI nodes.
+ if (!I || I->getParent() != &BB || isa<PHINode>(I))
+ continue;
// This is a known uniform.
Uniforms.insert(I);
@@ -1493,7 +1672,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
// If the address of i is unknown (for example A[B[i]]) then we may
// read a few words, modify, and write a few words, and some of the
// words may be written to the same address.
- if (Seen.insert(Ptr) || !isConsecutiveGep(Ptr))
+ if (Seen.insert(Ptr) || !isConsecutivePtr(Ptr))
Reads.push_back(Ptr);
}
@@ -1509,7 +1688,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
bool RT = true;
for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I)
if (hasComputableBounds(*I)) {
- PtrRtCheck.Pointers.push_back(*I);
+ PtrRtCheck.insert(SE, TheLoop, *I);
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n");
} else {
RT = false;
@@ -1517,7 +1696,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
}
for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I)
if (hasComputableBounds(*I)) {
- PtrRtCheck.Pointers.push_back(*I);
+ PtrRtCheck.insert(SE, TheLoop, *I);
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n");
} else {
RT = false;
@@ -1527,7 +1706,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
// Check that we did not collect too many pointers or found a
// unsizeable pointer.
if (!RT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) {
- PtrRtCheck.Pointers.clear();
+ PtrRtCheck.reset();
RT = false;
}
@@ -1582,8 +1761,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
// It is safe to vectorize and we don't need any runtime checks.
DEBUG(dbgs() << "LV: We don't need a runtime memory check.\n");
- PtrRtCheck.Pointers.clear();
- PtrRtCheck.Need = false;
+ PtrRtCheck.reset();
return true;
}
@@ -1677,8 +1855,6 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,
case Instruction::Sub:
return Kind == IntegerAdd;
case Instruction::Mul:
- case Instruction::UDiv:
- case Instruction::SDiv:
return Kind == IntegerMult;
case Instruction::And:
return Kind == IntegerAnd;
@@ -1690,6 +1866,11 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,
}
bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
+ Type *PhiTy = Phi->getType();
+ // We only handle integer and pointer inductions variables.
+ if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
+ return false;
+
// Check that the PHI is consecutive and starts at zero.
const SCEV *PhiScev = SE->getSCEV(Phi);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
@@ -1699,11 +1880,17 @@ bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
}
const SCEV *Step = AR->getStepRecurrence(*SE);
- if (!Step->isOne()) {
- DEBUG(dbgs() << "LV: PHI stride does not equal one.\n");
- return false;
- }
- return true;
+ // Integer inductions need to have a stride of one.
+ if (PhiTy->isIntegerTy())
+ return Step->isOne();
+
+ // Calculate the pointer stride and check if it is consecutive.
+ const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
+ if (!C) return false;
+
+ assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
+ uint64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType());
+ return (C->getValue()->equalsInt(Size));
}
bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) {
@@ -1832,7 +2019,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
SI->getAlignment(), SI->getPointerAddressSpace());
// Scalarized stores.
- if (!Legal->isConsecutiveGep(SI->getPointerOperand())) {
+ if (!Legal->isConsecutivePtr(SI->getPointerOperand())) {
unsigned Cost = 0;
unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
ValTy);
@@ -1859,7 +2046,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
LI->getPointerAddressSpace());
// Scalarized loads.
- if (!Legal->isConsecutiveGep(LI->getPointerOperand())) {
+ if (!Legal->isConsecutivePtr(LI->getPointerOperand())) {
unsigned Cost = 0;
unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy);
// The cost of inserting the loaded value into the result vector.
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index b72c17f667..c45a04f12b 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -84,7 +84,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out)
default: Out << "cc" << cc; break;
}
}
-
+
// PrintEscapedString - Print each character of the specified string, escaping
// it if it is not printable or if it is an escape char.
static void PrintEscapedString(StringRef Name, raw_ostream &Out) {
@@ -878,7 +878,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
Out << ']';
return;
}
-
+
if (const ConstantDataArray *CA = dyn_cast<ConstantDataArray>(CV)) {
// As a special case, print the array as a string if it is an array of
// i8 with ConstantInt values.
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index 5a552c34e1..55722bcf75 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -281,14 +281,14 @@ bool AttrBuilder::hasAlignmentAttr() const {
uint64_t AttrBuilder::getAlignment() const {
if (!hasAlignmentAttr())
return 0;
- return 1U <<
+ return 1ULL <<
(((Bits & AttributesImpl::getAttrMask(Attributes::Alignment)) >> 16) - 1);
}
uint64_t AttrBuilder::getStackAlignment() const {
if (!hasAlignmentAttr())
return 0;
- return 1U <<
+ return 1ULL <<
(((Bits & AttributesImpl::getAttrMask(Attributes::StackAlignment))>>26)-1);
}
@@ -355,62 +355,8 @@ uint64_t AttributesImpl::getStackAlignment() const {
// AttributeListImpl Definition
//===----------------------------------------------------------------------===//
-namespace llvm {
- class AttributeListImpl;
-}
-
-static ManagedStatic<FoldingSet<AttributeListImpl> > AttributesLists;
-
-namespace llvm {
-static ManagedStatic<sys::SmartMutex<true> > ALMutex;
-
-class AttributeListImpl : public FoldingSetNode {
- sys::cas_flag RefCount;
-
- // AttributesList is uniqued, these should not be publicly available.
- void operator=(const AttributeListImpl &) LLVM_DELETED_FUNCTION;
- AttributeListImpl(const AttributeListImpl &) LLVM_DELETED_FUNCTION;
- ~AttributeListImpl(); // Private implementation
-public:
- SmallVector<AttributeWithIndex, 4> Attrs;
-
- AttributeListImpl(ArrayRef<AttributeWithIndex> attrs)
- : Attrs(attrs.begin(), attrs.end()) {
- RefCount = 0;
- }
-
- void AddRef() {
- sys::SmartScopedLock<true> Lock(*ALMutex);
- ++RefCount;
- }
- void DropRef() {
- sys::SmartScopedLock<true> Lock(*ALMutex);
- if (!AttributesLists.isConstructed())
- return;
- sys::cas_flag new_val = --RefCount;
- if (new_val == 0)
- delete this;
- }
-
- void Profile(FoldingSetNodeID &ID) const {
- Profile(ID, Attrs);
- }
- static void Profile(FoldingSetNodeID &ID, ArrayRef<AttributeWithIndex> Attrs){
- for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
- ID.AddInteger(Attrs[i].Attrs.Raw());
- ID.AddInteger(Attrs[i].Index);
- }
- }
-};
-
-} // end llvm namespace
-
-AttributeListImpl::~AttributeListImpl() {
- // NOTE: Lock must be acquired by caller.
- AttributesLists->RemoveNode(this);
-}
-
-AttrListPtr AttrListPtr::get(ArrayRef<AttributeWithIndex> Attrs) {
+AttrListPtr AttrListPtr::get(LLVMContext &C,
+ ArrayRef<AttributeWithIndex> Attrs) {
// If there are no attributes then return a null AttributesList pointer.
if (Attrs.empty())
return AttrListPtr();
@@ -425,51 +371,36 @@ AttrListPtr AttrListPtr::get(ArrayRef<AttributeWithIndex> Attrs) {
#endif
// Otherwise, build a key to look up the existing attributes.
+ LLVMContextImpl *pImpl = C.pImpl;
FoldingSetNodeID ID;
AttributeListImpl::Profile(ID, Attrs);
- void *InsertPos;
-
- sys::SmartScopedLock<true> Lock(*ALMutex);
- AttributeListImpl *PAL =
- AttributesLists->FindNodeOrInsertPos(ID, InsertPos);
+ void *InsertPoint;
+ AttributeListImpl *PA = pImpl->AttrsLists.FindNodeOrInsertPos(ID,
+ InsertPoint);
// If we didn't find any existing attributes of the same shape then
// create a new one and insert it.
- if (!PAL) {
- PAL = new AttributeListImpl(Attrs);
- AttributesLists->InsertNode(PAL, InsertPos);
+ if (!PA) {
+ PA = new AttributeListImpl(Attrs);
+ pImpl->AttrsLists.InsertNode(PA, InsertPoint);
}
// Return the AttributesList that we found or created.
- return AttrListPtr(PAL);
+ return AttrListPtr(PA);
}
//===----------------------------------------------------------------------===//
// AttrListPtr Method Implementations
//===----------------------------------------------------------------------===//
-AttrListPtr::AttrListPtr(AttributeListImpl *LI) : AttrList(LI) {
- if (LI) LI->AddRef();
-}
-
-AttrListPtr::AttrListPtr(const AttrListPtr &P) : AttrList(P.AttrList) {
- if (AttrList) AttrList->AddRef();
-}
-
const AttrListPtr &AttrListPtr::operator=(const AttrListPtr &RHS) {
- sys::SmartScopedLock<true> Lock(*ALMutex);
if (AttrList == RHS.AttrList) return *this;
- if (AttrList) AttrList->DropRef();
+
AttrList = RHS.AttrList;
- if (AttrList) AttrList->AddRef();
return *this;
}
-AttrListPtr::~AttrListPtr() {
- if (AttrList) AttrList->DropRef();
-}
-
/// getNumSlots - Return the number of slots used in this attribute list.
/// This is the number of arguments that have an attribute set on them
/// (including the function itself).
@@ -507,6 +438,7 @@ bool AttrListPtr::hasAttrSomewhere(Attributes::AttrVal Attr) const {
for (unsigned i = 0, e = Attrs.size(); i != e; ++i)
if (Attrs[i].Attrs.hasAttribute(Attr))
return true;
+
return false;
}
@@ -562,7 +494,7 @@ AttrListPtr AttrListPtr::addAttr(LLVMContext &C, unsigned Idx,
OldAttrList.begin()+i, OldAttrList.end());
}
- return get(NewAttrList);
+ return get(C, NewAttrList);
}
AttrListPtr AttrListPtr::removeAttr(LLVMContext &C, unsigned Idx,
@@ -601,7 +533,7 @@ AttrListPtr AttrListPtr::removeAttr(LLVMContext &C, unsigned Idx,
NewAttrList.insert(NewAttrList.end(),
OldAttrList.begin()+i, OldAttrList.end());
- return get(NewAttrList);
+ return get(C, NewAttrList);
}
void AttrListPtr::dump() const {
diff --git a/lib/VMCore/AttributesImpl.h b/lib/VMCore/AttributesImpl.h
index b4a0f615f3..5c107e1ebb 100644
--- a/lib/VMCore/AttributesImpl.h
+++ b/lib/VMCore/AttributesImpl.h
@@ -15,12 +15,11 @@
#ifndef LLVM_ATTRIBUTESIMPL_H
#define LLVM_ATTRIBUTESIMPL_H
+#include "llvm/Attributes.h"
#include "llvm/ADT/FoldingSet.h"
namespace llvm {
-class Attributes;
-
class AttributesImpl : public FoldingSetNode {
uint64_t Bits; // FIXME: We will be expanding this.
public:
@@ -46,6 +45,27 @@ public:
}
};
+class AttributeListImpl : public FoldingSetNode {
+ // AttributesList is uniqued, these should not be publicly available.
+ void operator=(const AttributeListImpl &) LLVM_DELETED_FUNCTION;
+ AttributeListImpl(const AttributeListImpl &) LLVM_DELETED_FUNCTION;
+public:
+ SmallVector<AttributeWithIndex, 4> Attrs;
+
+ AttributeListImpl(ArrayRef<AttributeWithIndex> attrs)
+ : Attrs(attrs.begin(), attrs.end()) {}
+
+ void Profile(FoldingSetNodeID &ID) const {
+ Profile(ID, Attrs);
+ }
+ static void Profile(FoldingSetNodeID &ID, ArrayRef<AttributeWithIndex> Attrs){
+ for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
+ ID.AddInteger(Attrs[i].Attrs.Raw());
+ ID.AddInteger(Attrs[i].Index);
+ }
+ }
+};
+
} // end llvm namespace
#endif
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index a4e21e16b3..a4514309b2 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -245,6 +245,33 @@ bool Constant::canTrap() const {
}
}
+/// isThreadDependent - Return true if the value can vary between threads.
+bool Constant::isThreadDependent() const {
+ SmallPtrSet<const Constant*, 64> Visited;
+ SmallVector<const Constant*, 64> WorkList;
+ WorkList.push_back(this);
+ Visited.insert(this);
+
+ while (!WorkList.empty()) {
+ const Constant *C = WorkList.pop_back_val();
+
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+ if (GV->isThreadLocal())
+ return true;
+ }
+
+ for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) {
+ const Constant *D = dyn_cast<Constant>(C->getOperand(I));
+ if (!D)
+ continue;
+ if (Visited.insert(D))
+ WorkList.push_back(D);
+ }
+ }
+
+ return false;
+}
+
/// isConstantUsed - Return true if the constant has users other than constant
/// exprs and other dangling things.
bool Constant::isConstantUsed() const {
@@ -1213,6 +1240,19 @@ void ConstantVector::destroyConstant() {
destroyConstantImpl();
}
+/// getSplatValue - If this is a splat vector constant, meaning that all of
+/// the elements have the same value, return that value. Otherwise return 0.
+Constant *Constant::getSplatValue() const {
+ assert(this->getType()->isVectorTy() && "Only valid for vectors!");
+ if (isa<ConstantAggregateZero>(this))
+ return getNullValue(this->getType()->getVectorElementType());
+ if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
+ return CV->getSplatValue();
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
+ return CV->getSplatValue();
+ return 0;
+}
+
/// getSplatValue - If this is a splat constant, where all of the
/// elements have the same value, return that value. Otherwise return null.
Constant *ConstantVector::getSplatValue() const {
@@ -1225,6 +1265,18 @@ Constant *ConstantVector::getSplatValue() const {
return Elt;
}
+/// If C is a constant integer then return its value, otherwise C must be a
+/// vector of constant integers, all equal, and the common value is returned.
+const APInt &Constant::getUniqueInteger() const {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
+ return CI->getValue();
+ assert(this->getSplatValue() && "Doesn't contain a unique integer!");
+ const Constant *C = this->getAggregateElement(0U);
+ assert(C && isa<ConstantInt>(C) && "Not a vector of numbers!");
+ return cast<ConstantInt>(C)->getValue();
+}
+
+
//---- ConstantPointerNull::get() implementation.
//
@@ -1739,6 +1791,9 @@ Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2) {
Constant *ConstantExpr::getGetElementPtr(Constant *C, ArrayRef<Value *> Idxs,
bool InBounds) {
+ assert(C->getType()->isPtrOrPtrVectorTy() &&
+ "Non-pointer type for constant GetElementPtr expression");
+
if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs))
return FC; // Fold a few common cases.
@@ -1747,15 +1802,22 @@ Constant *ConstantExpr::getGetElementPtr(Constant *C, ArrayRef<Value *> Idxs,
assert(Ty && "GEP indices invalid!");
unsigned AS = C->getType()->getPointerAddressSpace();
Type *ReqTy = Ty->getPointerTo(AS);
+ if (VectorType *VecTy = dyn_cast<VectorType>(C->getType()))
+ ReqTy = VectorType::get(ReqTy, VecTy->getNumElements());
- assert(C->getType()->isPointerTy() &&
- "Non-pointer type for constant GetElementPtr expression");
// Look up the constant in the table first to ensure uniqueness
std::vector<Constant*> ArgVec;
ArgVec.reserve(1 + Idxs.size());
ArgVec.push_back(C);
- for (unsigned i = 0, e = Idxs.size(); i != e; ++i)
+ for (unsigned i = 0, e = Idxs.size(); i != e; ++i) {
+ assert(Idxs[i]->getType()->isVectorTy() == ReqTy->isVectorTy() &&
+ "getelementptr index type missmatch");
+ assert((!Idxs[i]->getType()->isVectorTy() ||
+ ReqTy->getVectorNumElements() ==
+ Idxs[i]->getType()->getVectorNumElements()) &&
+ "getelementptr index type missmatch");
ArgVec.push_back(cast<Constant>(Idxs[i]));
+ }
const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec, 0,
InBounds ? GEPOperator::IsInBounds : 0);
@@ -2642,3 +2704,66 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
// Delete the old constant!
destroyConstant();
}
+
+Instruction *ConstantExpr::getAsInstruction() {
+ SmallVector<Value*,4> ValueOperands;
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
+ ValueOperands.push_back(cast<Value>(I));
+
+ ArrayRef<Value*> Ops(ValueOperands);
+
+ switch (getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ return CastInst::Create((Instruction::CastOps)getOpcode(),
+ Ops[0], getType());
+ case Instruction::Select:
+ return SelectInst::Create(Ops[0], Ops[1], Ops[2]);
+ case Instruction::InsertElement:
+ return InsertElementInst::Create(Ops[0], Ops[1], Ops[2]);
+ case Instruction::ExtractElement:
+ return ExtractElementInst::Create(Ops[0], Ops[1]);
+ case Instruction::InsertValue:
+ return InsertValueInst::Create(Ops[0], Ops[1], getIndices());
+ case Instruction::ExtractValue:
+ return ExtractValueInst::Create(Ops[0], getIndices());
+ case Instruction::ShuffleVector:
+ return new ShuffleVectorInst(Ops[0], Ops[1], Ops[2]);
+
+ case Instruction::GetElementPtr:
+ if (cast<GEPOperator>(this)->isInBounds())
+ return GetElementPtrInst::CreateInBounds(Ops[0], Ops.slice(1));
+ else
+ return GetElementPtrInst::Create(Ops[0], Ops.slice(1));
+
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return CmpInst::Create((Instruction::OtherOps)getOpcode(),
+ getPredicate(), Ops[0], Ops[1]);
+
+ default:
+ assert(getNumOperands() == 2 && "Must be binary operator?");
+ BinaryOperator *BO =
+ BinaryOperator::Create((Instruction::BinaryOps)getOpcode(),
+ Ops[0], Ops[1]);
+ if (isa<OverflowingBinaryOperator>(BO)) {
+ BO->setHasNoUnsignedWrap(SubclassOptionalData &
+ OverflowingBinaryOperator::NoUnsignedWrap);
+ BO->setHasNoSignedWrap(SubclassOptionalData &
+ OverflowingBinaryOperator::NoSignedWrap);
+ }
+ if (isa<PossiblyExactOperator>(BO))
+ BO->setIsExact(SubclassOptionalData & PossiblyExactOperator::IsExact);
+ return BO;
+ }
+}
diff --git a/lib/VMCore/DataLayout.cpp b/lib/VMCore/DataLayout.cpp
index 19cf0f5cd3..e21868be44 100644
--- a/lib/VMCore/DataLayout.cpp
+++ b/lib/VMCore/DataLayout.cpp
@@ -159,7 +159,7 @@ static int getInt(StringRef R) {
return Result;
}
-void DataLayout::init() {
+void DataLayout::init(StringRef Desc) {
initializeDataLayoutPass(*PassRegistry::getPassRegistry());
LayoutMap = 0;
@@ -180,21 +180,19 @@ void DataLayout::init() {
setAlignment(VECTOR_ALIGN, 16, 16, 128); // v16i8, v8i16, v4i32, ...
setAlignment(AGGREGATE_ALIGN, 0, 8, 0); // struct
setPointerAlignment(0, 8, 8, 8);
-}
-std::string DataLayout::parseSpecifier(StringRef Desc, DataLayout *td) {
+ std::string errMsg = parseSpecifier(Desc);
+ assert(errMsg == "" && "Invalid target data layout string.");
+ (void)errMsg;
+}
- if (td)
- td->init();
+std::string DataLayout::parseSpecifier(StringRef Desc) {
while (!Desc.empty()) {
std::pair<StringRef, StringRef> Split = Desc.split('-');
StringRef Token = Split.first;
Desc = Split.second;
- if (Token.empty())
- continue;
-
Split = Token.split(':');
StringRef Specifier = Split.first;
Token = Split.second;
@@ -203,12 +201,10 @@ std::string DataLayout::parseSpecifier(StringRef Desc, DataLayout *td) {
switch (Specifier[0]) {
case 'E':
- if (td)
- td->LittleEndian = false;
+ LittleEndian = false;
break;
case 'e':
- if (td)
- td->LittleEndian = true;
+ LittleEndian = true;
break;
case 'p': {
int AddrSpace = 0;
@@ -240,9 +236,8 @@ std::string DataLayout::parseSpecifier(StringRef Desc, DataLayout *td) {
if (PointerPrefAlignBits == 0)
PointerPrefAlignBits = PointerABIAlignBits;
- if (td)
- td->setPointerAlignment(AddrSpace, PointerABIAlignBits/8,
- PointerPrefAlignBits/8, PointerMemSizeBits/8);
+ setPointerAlignment(AddrSpace, PointerABIAlignBits/8,
+ PointerPrefAlignBits/8, PointerMemSizeBits/8);
break;
}
case 'i':
@@ -284,9 +279,8 @@ std::string DataLayout::parseSpecifier(StringRef Desc, DataLayout *td) {
unsigned PrefAlign = PrefAlignBits / 8;
if (PrefAlign == 0)
PrefAlign = ABIAlign;
+ setAlignment(AlignType, ABIAlign, PrefAlign, Size);
- if (td)
- td->setAlignment(AlignType, ABIAlign, PrefAlign, Size);
break;
}
case 'n': // Native integer types.
@@ -297,8 +291,8 @@ std::string DataLayout::parseSpecifier(StringRef Desc, DataLayout *td) {
return std::string("invalid native integer size \'") +
Specifier.str() + "\', must be a positive integer.";
}
- if (td && Width != 0)
- td->LegalIntWidths.push_back(Width);
+ if (Width != 0)
+ LegalIntWidths.push_back(Width);
Split = Token.split(':');
Specifier = Split.first;
Token = Split.second;
@@ -310,8 +304,7 @@ std::string DataLayout::parseSpecifier(StringRef Desc, DataLayout *td) {
return "invalid natural stack alignment (S-field), "
"must be a positive 8-bit multiple";
}
- if (td)
- td->StackNaturalAlign = StackNaturalAlignBits / 8;
+ StackNaturalAlign = StackNaturalAlignBits / 8;
break;
}
default:
@@ -333,9 +326,7 @@ DataLayout::DataLayout() : ImmutablePass(ID) {
DataLayout::DataLayout(const Module *M)
: ImmutablePass(ID) {
- std::string errMsg = parseSpecifier(M->getDataLayout(), this);
- assert(errMsg == "" && "Module M has malformed data layout string.");
- (void)errMsg;
+ init(M->getDataLayout());
}
void
@@ -543,8 +534,6 @@ uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const {
return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
case Type::IntegerTyID:
return cast<IntegerType>(Ty)->getBitWidth();
- case Type::VoidTyID:
- return 8;
case Type::HalfTyID:
return 16;
case Type::FloatTyID:
@@ -606,7 +595,6 @@ unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
return std::max(Align, Layout->getAlignment());
}
case Type::IntegerTyID:
- case Type::VoidTyID:
AlignType = INTEGER_ALIGN;
break;
case Type::HalfTyID:
diff --git a/lib/VMCore/DebugInfo.cpp b/lib/VMCore/DebugInfo.cpp
index 3029ce2734..5eea2ce9e0 100644
--- a/lib/VMCore/DebugInfo.cpp
+++ b/lib/VMCore/DebugInfo.cpp
@@ -68,7 +68,8 @@ uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
return 0;
if (Elt < DbgNode->getNumOperands())
- if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DbgNode->getOperand(Elt)))
+ if (ConstantInt *CI
+ = dyn_cast_or_null<ConstantInt>(DbgNode->getOperand(Elt)))
return CI->getZExtValue();
return 0;
@@ -691,7 +692,7 @@ static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) {
}
}
-/// getFnSpecificMDNode - Return a NameMDNode, if available, that is
+/// getFnSpecificMDNode - Return a NameMDNode, if available, that is
/// suitable to hold function specific information.
NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, DISubprogram Fn) {
SmallString<32> Name = StringRef("llvm.dbg.lv.");
@@ -720,7 +721,7 @@ NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, DISubprogram Fn) {
if (FName.startswith(StringRef(&One, 1)))
FName = FName.substr(1);
fixupObjcLikeName(FName, Name);
-
+
return M.getOrInsertNamedMetadata(Name.str());
}
@@ -743,7 +744,7 @@ DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) {
SmallVector<Value *, 16> Elts;
// Insert inlined scope as 7th element.
for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i)
- i == 7 ?
+ i == 7 ?
Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))):
Elts.push_back(DV->getOperand(i));
return DIVariable(MDNode::get(VMContext, Elts));
@@ -757,7 +758,7 @@ DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
if (D.isLexicalBlockFile())
return getDISubprogram(DILexicalBlockFile(Scope).getContext());
-
+
if (D.isLexicalBlock())
return getDISubprogram(DILexicalBlock(Scope).getContext());
@@ -793,7 +794,7 @@ bool llvm::isSubprogramContext(const MDNode *Context) {
//===----------------------------------------------------------------------===//
/// processModule - Process entire module and collect debug info.
-void DebugInfoFinder::processModule(Module &M) {
+void DebugInfoFinder::processModule(const Module &M) {
if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) {
for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
DICompileUnit CU(CU_Nodes->getOperand(i));
@@ -819,11 +820,12 @@ void DebugInfoFinder::processModule(Module &M) {
}
}
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI)
- for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE;
- ++BI) {
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
+ for (Function::const_iterator FI = (*I).begin(), FE = (*I).end();
+ FI != FE; ++FI)
+ for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end();
+ BI != BE; ++BI) {
+ if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
processDeclare(DDI);
DebugLoc Loc = BI->getDebugLoc();
@@ -927,7 +929,7 @@ void DebugInfoFinder::processSubprogram(DISubprogram SP) {
}
/// processDeclare - Process DbgDeclareInst.
-void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) {
+void DebugInfoFinder::processDeclare(const DbgDeclareInst *DDI) {
MDNode *N = dyn_cast<MDNode>(DDI->getVariable());
if (!N) return;
@@ -1065,7 +1067,7 @@ void DIType::printInternal(raw_ostream &OS) const {
<< ", align " << getAlignInBits()
<< ", offset " << getOffsetInBits();
if (isBasicType())
- if (const char *Enc =
+ if (const char *Enc =
dwarf::AttributeEncodingString(DIBasicType(DbgNode).getEncoding()))
OS << ", enc " << Enc;
OS << "]";
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index 66379a0493..c0d1feeb9a 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -434,14 +434,14 @@ Instruction *Instruction::clone() const {
New->SubclassOptionalData = SubclassOptionalData;
if (!hasMetadata())
return New;
-
+
// Otherwise, enumerate and copy over metadata from the old instruction to the
// new one.
SmallVector<std::pair<unsigned, MDNode*>, 4> TheMDs;
getAllMetadataOtherThanDebugLoc(TheMDs);
for (unsigned i = 0, e = TheMDs.size(); i != e; ++i)
New->setMetadata(TheMDs[i].first, TheMDs[i].second);
-
+
New->setDebugLoc(getDebugLoc());
return New;
}
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 94bd2a1563..ca7cd4eb97 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -1353,16 +1353,7 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
///
template <typename IndexTy>
static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
- if (Ptr->isVectorTy()) {
- assert(IdxList.size() == 1 &&
- "GEP with vector pointers must have a single index");
- PointerType *PTy = dyn_cast<PointerType>(
- cast<VectorType>(Ptr)->getElementType());
- assert(PTy && "Gep with invalid vector pointer found");
- return PTy->getElementType();
- }
-
- PointerType *PTy = dyn_cast<PointerType>(Ptr);
+ PointerType *PTy = dyn_cast<PointerType>(Ptr->getScalarType());
if (!PTy) return 0; // Type isn't a pointer type!
Type *Agg = PTy->getElementType();
diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp
index 74247bdde1..d35d2844b8 100644
--- a/lib/VMCore/LLVMContextImpl.cpp
+++ b/lib/VMCore/LLVMContextImpl.cpp
@@ -97,11 +97,18 @@ LLVMContextImpl::~LLVMContextImpl() {
// Destroy attributes.
for (FoldingSetIterator<AttributesImpl> I = AttrsSet.begin(),
- E = AttrsSet.end(); I != E;) {
+ E = AttrsSet.end(); I != E; ) {
FoldingSetIterator<AttributesImpl> Elem = I++;
delete &*Elem;
}
+ // Destroy attribute lists.
+ for (FoldingSetIterator<AttributeListImpl> I = AttrsLists.begin(),
+ E = AttrsLists.end(); I != E; ) {
+ FoldingSetIterator<AttributeListImpl> Elem = I++;
+ delete &*Elem;
+ }
+
// Destroy MDNodes. ~MDNode can move and remove nodes between the MDNodeSet
// and the NonUniquedMDNodes sets, so copy the values out first.
SmallVector<MDNode*, 8> MDNodes;
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index ee31814c05..d4c28b435a 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -46,7 +46,6 @@ struct DenseMapAPIntKeyInfo {
APInt val;
Type* type;
KeyTy(const APInt& V, Type* Ty) : val(V), type(Ty) {}
- KeyTy(const KeyTy& that) : val(that.val), type(that.type) {}
bool operator==(const KeyTy& that) const {
return type == that.type && this->val == that.val;
}
@@ -71,7 +70,6 @@ struct DenseMapAPFloatKeyInfo {
struct KeyTy {
APFloat val;
KeyTy(const APFloat& V) : val(V){}
- KeyTy(const KeyTy& that) : val(that.val) {}
bool operator==(const KeyTy& that) const {
return this->val.bitwiseIsEqual(that.val);
}
@@ -102,8 +100,6 @@ struct AnonStructTypeKeyInfo {
bool isPacked;
KeyTy(const ArrayRef<Type*>& E, bool P) :
ETypes(E), isPacked(P) {}
- KeyTy(const KeyTy& that) :
- ETypes(that.ETypes), isPacked(that.isPacked) {}
KeyTy(const StructType* ST) :
ETypes(ArrayRef<Type*>(ST->element_begin(), ST->element_end())),
isPacked(ST->isPacked()) {}
@@ -149,10 +145,6 @@ struct FunctionTypeKeyInfo {
bool isVarArg;
KeyTy(const Type* R, const ArrayRef<Type*>& P, bool V) :
ReturnType(R), Params(P), isVarArg(V) {}
- KeyTy(const KeyTy& that) :
- ReturnType(that.ReturnType),
- Params(that.Params),
- isVarArg(that.isVarArg) {}
KeyTy(const FunctionType* FT) :
ReturnType(FT->getReturnType()),
Params(ArrayRef<Type*>(FT->param_begin(), FT->param_end())),
@@ -256,7 +248,8 @@ public:
FPMapTy FPConstants;
FoldingSet<AttributesImpl> AttrsSet;
-
+ FoldingSet<AttributeListImpl> AttrsLists;
+
StringMap<Value*> MDStringCache;
FoldingSet<MDNode> MDNodeSet;
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index 53f11499e4..3a8a9e25e3 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -309,6 +309,14 @@ public:
/// whether any of the passes modifies the module, and if so, return true.
bool runOnModule(Module &M);
+ /// doInitialization - Run all of the initializers for the module passes.
+ ///
+ bool doInitialization();
+
+ /// doFinalization - Run all of the finalizers for the module passes.
+ ///
+ bool doFinalization();
+
/// Pass Manager itself does not invalidate any analysis info.
void getAnalysisUsage(AnalysisUsage &Info) const {
Info.setPreservesAll();
@@ -394,6 +402,14 @@ public:
/// whether any of the passes modifies the module, and if so, return true.
bool run(Module &M);
+ /// doInitialization - Run all of the initializers for the module passes.
+ ///
+ bool doInitialization();
+
+ /// doFinalization - Run all of the finalizers for the module passes.
+ ///
+ bool doFinalization();
+
/// Pass Manager itself does not invalidate any analysis info.
void getAnalysisUsage(AnalysisUsage &Info) const {
Info.setPreservesAll();
@@ -1594,6 +1610,29 @@ MPPassManager::runOnModule(Module &M) {
FPP->releaseMemoryOnTheFly();
Changed |= FPP->doFinalization(M);
}
+
+ return Changed;
+}
+
+/// Run all of the initializers for the module passes.
+///
+bool MPPassManager::doInitialization() {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+ Changed |= getContainedPass(Index)->doInitialization();
+
+ return Changed;
+}
+
+/// Run all of the finalizers for the module passes.
+///
+bool MPPassManager::doFinalization() {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+ Changed |= getContainedPass(Index)->doFinalization();
+
return Changed;
}
@@ -1615,6 +1654,18 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
OnTheFlyManagers[P] = FPP;
}
+
+ // If RequiredPass is an analysis pass and it is available then do not
+ // generate the analysis again. Stale analysis info should not be
+ // available at this point.
+ const PassInfo *PI =
+ PassRegistry::getPassRegistry()->getPassInfo(RequiredPass->getPassID());
+ if (PI && PI->isAnalysis() &&
+ FPP->getTopLevelManager()->findAnalysisPass(RequiredPass->getPassID())) {
+ delete RequiredPass;
+ return;
+ }
+
FPP->add(RequiredPass);
// Register P as the last user of RequiredPass.
@@ -1640,6 +1691,25 @@ Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){
//===----------------------------------------------------------------------===//
// PassManagerImpl implementation
+
+bool PassManagerImpl::doInitialization() {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ Changed |= getContainedManager(Index)->doInitialization();
+
+ return Changed;
+}
+
+bool PassManagerImpl::doFinalization() {
+ bool Changed = false;
+
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ Changed |= getContainedManager(Index)->doFinalization();
+
+ return Changed;
+}
+
//
/// run - Execute all of the passes scheduled for execution. Keep track of
/// whether any of the passes modifies the module, and if so, return true.
@@ -1684,6 +1754,18 @@ bool PassManager::run(Module &M) {
return PM->run(M);
}
+/// doInitialization - Run all of the initializers for the module passes.
+///
+bool PassManager::doInitialization() {
+ return PM->doInitialization();
+}
+
+/// doFinalization - Run all of the finalizers for the module passes.
+///
+bool PassManager::doFinalization() {
+ return PM->doFinalization();
+}
+
//===----------------------------------------------------------------------===//
// TimingInfo Class - This class is used to calculate information about the
// amount of time each pass takes to execute. This only happens with
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 1656ab2cab..4d75a7e060 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -629,11 +629,12 @@ StructType *Module::getTypeByName(StringRef Name) const {
Type *CompositeType::getTypeAtIndex(const Value *V) {
if (StructType *STy = dyn_cast<StructType>(this)) {
- unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue();
+ unsigned Idx =
+ (unsigned)cast<Constant>(V)->getUniqueInteger().getZExtValue();
assert(indexValid(Idx) && "Invalid structure index!");
return STy->getElementType(Idx);
}
-
+
return cast<SequentialType>(this)->getElementType();
}
Type *CompositeType::getTypeAtIndex(unsigned Idx) {
@@ -646,15 +647,19 @@ Type *CompositeType::getTypeAtIndex(unsigned Idx) {
}
bool CompositeType::indexValid(const Value *V) const {
if (const StructType *STy = dyn_cast<StructType>(this)) {
- // Structure indexes require 32-bit integer constants.
- if (V->getType()->isIntegerTy(32))
- if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
- return CU->getZExtValue() < STy->getNumElements();
- return false;
+ // Structure indexes require (vectors of) 32-bit integer constants. In the
+ // vector case all of the indices must be equal.
+ if (!V->getType()->getScalarType()->isIntegerTy(32))
+ return false;
+ const Constant *C = dyn_cast<Constant>(V);
+ if (C && V->getType()->isVectorTy())
+ C = C->getSplatValue();
+ const ConstantInt *CU = dyn_cast_or_null<ConstantInt>(C);
+ return CU && CU->getZExtValue() < STy->getNumElements();
}
-
+
// Sequential types can be indexed by any integer.
- return V->getType()->isIntegerTy();
+ return V->getType()->isIntOrIntVectorTy();
}
bool CompositeType::indexValid(unsigned Idx) const {
@@ -717,9 +722,8 @@ VectorType *VectorType::get(Type *elementType, unsigned NumElements) {
}
bool VectorType::isValidElementType(Type *ElemTy) {
- if (PointerType *PTy = dyn_cast<PointerType>(ElemTy))
- ElemTy = PTy->getElementType();
- return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy();
+ return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy() ||
+ ElemTy->isPointerTy();
}
//===----------------------------------------------------------------------===//
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index eb40b09d29..3782957f3b 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -1375,34 +1375,31 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
"GEP base pointer is not a vector or a vector of pointers", &GEP);
Assert1(cast<PointerType>(TargetTy)->getElementType()->isSized(),
"GEP into unsized type!", &GEP);
+ Assert1(GEP.getPointerOperandType()->isVectorTy() ==
+ GEP.getType()->isVectorTy(), "Vector GEP must return a vector value",
+ &GEP);
SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
Type *ElTy =
GetElementPtrInst::getIndexedType(GEP.getPointerOperandType(), Idxs);
Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP);
- if (GEP.getPointerOperandType()->isPointerTy()) {
- // Validate GEPs with scalar indices.
- Assert2(GEP.getType()->isPointerTy() &&
- cast<PointerType>(GEP.getType())->getElementType() == ElTy,
- "GEP is not of right type for indices!", &GEP, ElTy);
- } else {
- // Validate GEPs with a vector index.
- Assert1(Idxs.size() == 1, "Invalid number of indices!", &GEP);
- Value *Index = Idxs[0];
- Type *IndexTy = Index->getType();
- Assert1(IndexTy->isVectorTy(),
- "Vector GEP must have vector indices!", &GEP);
- Assert1(GEP.getType()->isVectorTy(),
- "Vector GEP must return a vector value", &GEP);
- Type *ElemPtr = cast<VectorType>(GEP.getType())->getElementType();
- Assert1(ElemPtr->isPointerTy(),
- "Vector GEP pointer operand is not a pointer!", &GEP);
- unsigned IndexWidth = cast<VectorType>(IndexTy)->getNumElements();
- unsigned GepWidth = cast<VectorType>(GEP.getType())->getNumElements();
- Assert1(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP);
- Assert1(ElTy == cast<PointerType>(ElemPtr)->getElementType(),
- "Vector GEP type does not match pointer type!", &GEP);
+ Assert2(GEP.getType()->getScalarType()->isPointerTy() &&
+ cast<PointerType>(GEP.getType()->getScalarType())->getElementType()
+ == ElTy, "GEP is not of right type for indices!", &GEP, ElTy);
+
+ if (GEP.getPointerOperandType()->isVectorTy()) {
+ // Additional checks for vector GEPs.
+ unsigned GepWidth = GEP.getPointerOperandType()->getVectorNumElements();
+ Assert1(GepWidth == GEP.getType()->getVectorNumElements(),
+ "Vector GEP result width doesn't match operand's", &GEP);
+ for (unsigned i = 0, e = Idxs.size(); i != e; ++i) {
+ Type *IndexTy = Idxs[i]->getType();
+ Assert1(IndexTy->isVectorTy(),
+ "Vector GEP must have vector indices!", &GEP);
+ unsigned IndexWidth = IndexTy->getVectorNumElements();
+ Assert1(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP);
+ }
}
visitInstruction(GEP);
}
diff --git a/projects/sample/Makefile.llvm.config.in b/projects/sample/Makefile.llvm.config.in
index 9a85b3df63..c7df998b26 100644
--- a/projects/sample/Makefile.llvm.config.in
+++ b/projects/sample/Makefile.llvm.config.in
@@ -184,6 +184,12 @@ RDYNAMIC := @RDYNAMIC@
#ENABLE_LIBCPP = 0
ENABLE_LIBCPP = @ENABLE_LIBCPP@
+# When ENABLE_CXX11 is enabled, LLVM uses c++11 mode by default to build.
+ENABLE_CXX11 = @ENABLE_CXX11@
+
+# When ENABLE_WERROR is enabled, we'll pass -Werror on the command line
+ENABLE_WERROR = @ENABLE_WERROR@
+
# When ENABLE_OPTIMIZED is enabled, LLVM code is optimized and output is put
# into the "Release" directories. Otherwise, LLVM code is not optimized and
# output is put in the "Debug" directories.
diff --git a/projects/sample/autoconf/configure.ac b/projects/sample/autoconf/configure.ac
index 8012c23412..1763ea2696 100644
--- a/projects/sample/autoconf/configure.ac
+++ b/projects/sample/autoconf/configure.ac
@@ -380,6 +380,18 @@ case "$enableval" in
*) AC_MSG_ERROR([Invalid setting for --enable-libcpp. Use "yes" or "no"]) ;;
esac
+dnl --enable-cxx11 : check whether or not to use -std=c++11 on the command line
+AC_ARG_ENABLE(cxx11,
+ AS_HELP_STRING([--enable-cxx11],
+ [Use c++11 if available (default is NO)]),,
+ enableval=default)
+case "$enableval" in
+ yes) AC_SUBST(ENABLE_CXX11,[1]) ;;
+ no) AC_SUBST(ENABLE_CXX11,[0]) ;;
+ default) AC_SUBST(ENABLE_CXX11,[0]);;
+ *) AC_MSG_ERROR([Invalid setting for --enable-cxx11. Use "yes" or "no"]) ;;
+esac
+
dnl --enable-optimized : check whether they want to do an optimized build:
AC_ARG_ENABLE(optimized, AS_HELP_STRING(
--enable-optimized,[Compile with optimizations enabled (default is NO)]),,enableval=$optimize)
@@ -407,6 +419,16 @@ else
AC_SUBST(DISABLE_ASSERTIONS,[[DISABLE_ASSERTIONS=1]])
fi
+dnl --enable-werror : check whether we want Werror on by default
+AC_ARG_ENABLE(werror,AS_HELP_STRING(
+ --enable-werror,[Compile with -Werror enabled (default is NO)]),, enableval="no")
+case "$enableval" in
+ yes) AC_SUBST(ENABLE_WERROR,[1]) ;;
+ no) AC_SUBST(ENABLE_WERROR,[0]) ;;
+ default) AC_SUBST(ENABLE_WERROR,[0]);;
+ *) AC_MSG_ERROR([Invalid setting for --enable-werror. Use "yes" or "no"]) ;;
+esac
+
dnl --enable-expensive-checks : check whether they want to turn on expensive debug checks:
AC_ARG_ENABLE(expensive-checks,AS_HELP_STRING(
--enable-expensive-checks,[Compile with expensive debug checks enabled (default is NO)]),, enableval="no")
@@ -574,7 +596,7 @@ if test "$enableval" = host-only ; then
enableval=host
fi
case "$enableval" in
- all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
+ all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
case "$a_target" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -583,7 +605,6 @@ case "$enableval" in
powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
- spu) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
xcore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
msp430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
@@ -598,7 +619,6 @@ case "$enableval" in
ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
MBlaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
- CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
diff --git a/projects/sample/configure b/projects/sample/configure
index 3baa1a7e16..6c622e573d 100755
--- a/projects/sample/configure
+++ b/projects/sample/configure
@@ -683,9 +683,11 @@ BUILD_EXEEXT
BUILD_CXX
CVSBUILD
ENABLE_LIBCPP
+ENABLE_CXX11
ENABLE_OPTIMIZED
ENABLE_PROFILING
DISABLE_ASSERTIONS
+ENABLE_WERROR
ENABLE_EXPENSIVE_CHECKS
EXPENSIVE_CHECKS
DEBUG_RUNTIME
@@ -1375,10 +1377,12 @@ Optional Features:
--enable-FEATURE[=ARG] include FEATURE [ARG=yes]
--enable-polly Use polly if available (default is YES)
--enable-libcpp Use libc++ if available (default is NO)
+ --enable-cxx11 Use c++11 if available (default is NO)
--enable-optimized Compile with optimizations enabled (default is NO)
--enable-profiling Compile with profiling enabled (default is NO)
--enable-assertions Compile with assertion checks enabled (default is
YES)
+ --enable-werror Compile with -Werror enabled (default is NO)
--enable-expensive-checks
Compile with expensive debug checks enabled (default
is NO)
@@ -4947,6 +4951,25 @@ echo "$as_me: error: Invalid setting for --enable-libcpp. Use \"yes\" or \"no\""
{ (exit 1); exit 1; }; } ;;
esac
+# Check whether --enable-cxx11 was given.
+if test "${enable_cxx11+set}" = set; then
+ enableval=$enable_cxx11;
+else
+ enableval=default
+fi
+
+case "$enableval" in
+ yes) ENABLE_CXX11=1
+ ;;
+ no) ENABLE_CXX11=0
+ ;;
+ default) ENABLE_CXX11=0
+;;
+ *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-cxx11. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-cxx11. Use \"yes\" or \"no\"" >&2;}
+ { (exit 1); exit 1; }; } ;;
+esac
+
# Check whether --enable-optimized was given.
if test "${enable_optimized+set}" = set; then
enableval=$enable_optimized;
@@ -4992,6 +5015,25 @@ else
fi
+# Check whether --enable-werror was given.
+if test "${enable_werror+set}" = set; then
+ enableval=$enable_werror;
+else
+ enableval="no"
+fi
+
+case "$enableval" in
+ yes) ENABLE_WERROR=1
+ ;;
+ no) ENABLE_WERROR=0
+ ;;
+ default) ENABLE_WERROR=0
+;;
+ *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-werror. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-werror. Use \"yes\" or \"no\"" >&2;}
+ { (exit 1); exit 1; }; } ;;
+esac
+
# Check whether --enable-expensive-checks was given.
if test "${enable_expensive_checks+set}" = set; then
enableval=$enable_expensive_checks;
@@ -5260,7 +5302,7 @@ if test "$enableval" = host-only ; then
enableval=host
fi
case "$enableval" in
- all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
+ all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
case "$a_target" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -5269,7 +5311,6 @@ case "$enableval" in
powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
- spu) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
xcore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
msp430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
@@ -5284,7 +5325,6 @@ case "$enableval" in
ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
MBlaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
- CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
@@ -10313,7 +10353,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<EOF
-#line 10311 "configure"
+#line 10351 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -21646,9 +21686,11 @@ BUILD_EXEEXT!$BUILD_EXEEXT$ac_delim
BUILD_CXX!$BUILD_CXX$ac_delim
CVSBUILD!$CVSBUILD$ac_delim
ENABLE_LIBCPP!$ENABLE_LIBCPP$ac_delim
+ENABLE_CXX11!$ENABLE_CXX11$ac_delim
ENABLE_OPTIMIZED!$ENABLE_OPTIMIZED$ac_delim
ENABLE_PROFILING!$ENABLE_PROFILING$ac_delim
DISABLE_ASSERTIONS!$DISABLE_ASSERTIONS$ac_delim
+ENABLE_WERROR!$ENABLE_WERROR$ac_delim
ENABLE_EXPENSIVE_CHECKS!$ENABLE_EXPENSIVE_CHECKS$ac_delim
EXPENSIVE_CHECKS!$EXPENSIVE_CHECKS$ac_delim
DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim
@@ -21658,8 +21700,6 @@ TARGET_HAS_JIT!$TARGET_HAS_JIT$ac_delim
ENABLE_DOCS!$ENABLE_DOCS$ac_delim
ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim
ENABLE_THREADS!$ENABLE_THREADS$ac_delim
-ENABLE_PTHREADS!$ENABLE_PTHREADS$ac_delim
-ENABLE_PIC!$ENABLE_PIC$ac_delim
_ACEOF
if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@@ -21701,6 +21741,8 @@ _ACEOF
ac_delim='%!_!# '
for ac_last_try in false false false false false :; do
cat >conf$$subs.sed <<_ACEOF
+ENABLE_PTHREADS!$ENABLE_PTHREADS$ac_delim
+ENABLE_PIC!$ENABLE_PIC$ac_delim
ENABLE_SHARED!$ENABLE_SHARED$ac_delim
ENABLE_EMBED_STDCXX!$ENABLE_EMBED_STDCXX$ac_delim
ENABLE_TIMESTAMPS!$ENABLE_TIMESTAMPS$ac_delim
@@ -21792,7 +21834,7 @@ LIBOBJS!$LIBOBJS$ac_delim
LTLIBOBJS!$LTLIBOBJS$ac_delim
_ACEOF
- if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 89; then
+ if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 91; then
break
elif $ac_last_try; then
{ { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/test/Analysis/BasicAA/phi-spec-order.ll b/test/Analysis/BasicAA/phi-spec-order.ll
new file mode 100644
index 0000000000..27d47bcd5b
--- /dev/null
+++ b/test/Analysis/BasicAA/phi-spec-order.ll
@@ -0,0 +1,71 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+@X = external global [16000 x double], align 32
+@Y = external global [16000 x double], align 32
+
+define signext i32 @s000() nounwind {
+entry:
+ br label %for.cond2.preheader
+
+for.cond2.preheader: ; preds = %for.end, %entry
+ %nl.018 = phi i32 [ 0, %entry ], [ %inc9, %for.end ]
+ br label %for.body4
+
+for.body4: ; preds = %for.body4, %for.cond2.preheader
+ %lsr.iv4 = phi [16000 x double]* [ %i11, %for.body4 ], [ bitcast (double* getelementptr inbounds ([16000 x double]* @Y, i64 0, i64 8)
+ to [16000 x double]*), %for.cond2.preheader ]
+ %lsr.iv1 = phi [16000 x double]* [ %i10, %for.body4 ], [ @X, %for.cond2.preheader ]
+
+; CHECK: NoAlias:{{[ \t]+}}[16000 x double]* %lsr.iv1, [16000 x double]* %lsr.iv4
+
+ %lsr.iv = phi i32 [ %lsr.iv.next, %for.body4 ], [ 16000, %for.cond2.preheader ]
+ %lsr.iv46 = bitcast [16000 x double]* %lsr.iv4 to <4 x double>*
+ %lsr.iv12 = bitcast [16000 x double]* %lsr.iv1 to <4 x double>*
+ %scevgep11 = getelementptr <4 x double>* %lsr.iv46, i64 -2
+ %i6 = load <4 x double>* %scevgep11, align 32, !tbaa !0
+ %add = fadd <4 x double> %i6, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ store <4 x double> %add, <4 x double>* %lsr.iv12, align 32, !tbaa !0
+ %scevgep10 = getelementptr <4 x double>* %lsr.iv46, i64 -1
+ %i7 = load <4 x double>* %scevgep10, align 32, !tbaa !0
+ %add.4 = fadd <4 x double> %i7, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %scevgep9 = getelementptr <4 x double>* %lsr.iv12, i64 1
+ store <4 x double> %add.4, <4 x double>* %scevgep9, align 32, !tbaa !0
+ %i8 = load <4 x double>* %lsr.iv46, align 32, !tbaa !0
+ %add.8 = fadd <4 x double> %i8, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %scevgep8 = getelementptr <4 x double>* %lsr.iv12, i64 2
+ store <4 x double> %add.8, <4 x double>* %scevgep8, align 32, !tbaa !0
+ %scevgep7 = getelementptr <4 x double>* %lsr.iv46, i64 1
+ %i9 = load <4 x double>* %scevgep7, align 32, !tbaa !0
+ %add.12 = fadd <4 x double> %i9, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %scevgep3 = getelementptr <4 x double>* %lsr.iv12, i64 3
+ store <4 x double> %add.12, <4 x double>* %scevgep3, align 32, !tbaa !0
+
+; CHECK: NoAlias:{{[ \t]+}}<4 x double>* %scevgep11, <4 x double>* %scevgep7
+; CHECK: NoAlias:{{[ \t]+}}<4 x double>* %scevgep10, <4 x double>* %scevgep7
+; CHECK: NoAlias:{{[ \t]+}}<4 x double>* %scevgep7, <4 x double>* %scevgep9
+; CHECK: NoAlias:{{[ \t]+}}<4 x double>* %scevgep11, <4 x double>* %scevgep3
+; CHECK: NoAlias:{{[ \t]+}}<4 x double>* %scevgep10, <4 x double>* %scevgep3
+; CHECK: NoAlias:{{[ \t]+}}<4 x double>* %scevgep3, <4 x double>* %scevgep9
+
+ %lsr.iv.next = add i32 %lsr.iv, -16
+ %scevgep = getelementptr [16000 x double]* %lsr.iv1, i64 0, i64 16
+ %i10 = bitcast double* %scevgep to [16000 x double]*
+ %scevgep5 = getelementptr [16000 x double]* %lsr.iv4, i64 0, i64 16
+ %i11 = bitcast double* %scevgep5 to [16000 x double]*
+ %exitcond.15 = icmp eq i32 %lsr.iv.next, 0
+ br i1 %exitcond.15, label %for.end, label %for.body4
+
+for.end: ; preds = %for.body4
+ %inc9 = add nsw i32 %nl.018, 1
+ %exitcond = icmp eq i32 %inc9, 400000
+ br i1 %exitcond, label %for.end10, label %for.cond2.preheader
+
+for.end10: ; preds = %for.end
+ ret i32 0
+}
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Analysis/DependenceAnalysis/Banerjee.ll b/test/Analysis/DependenceAnalysis/Banerjee.ll
index 8865ee9401..bc6de425a3 100644
--- a/test/Analysis/DependenceAnalysis/Banerjee.ll
+++ b/test/Analysis/DependenceAnalysis/Banerjee.ll
@@ -7,13 +7,20 @@ target triple = "x86_64-apple-macosx10.6.0"
;; for (long int i = 1; i <= 10; i++)
;; for (long int j = 1; j <= 10; j++) {
-;; A[10*i + j] = ...
-;; ... = A[10*i + j - 1];
+;; A[10*i + j] = 0;
+;; *B++ = A[10*i + j - 1];
define void @banerjee0(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
+; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - flow [<= <>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
for.cond1.preheader: ; preds = %entry, %for.inc7
%B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
%i.03 = phi i64 [ 1, %entry ], [ %inc8, %for.inc7 ]
@@ -31,7 +38,6 @@ for.body3: ; preds = %for.cond1.preheader
%sub = add nsw i64 %add5, -1
%arrayidx6 = getelementptr inbounds i64* %A, i64 %sub
%0 = load i64* %arrayidx6, align 8
-; CHECK: da analyze - flow [<= <>]!
%incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
store i64 %0, i64* %B.addr.11, align 8
%inc = add nsw i64 %j.02, 1
@@ -51,14 +57,21 @@ for.end9: ; preds = %for.inc7
;; for (long int i = 1; i <= n; i++)
;; for (long int j = 1; j <= m; j++) {
-;; A[10*i + j] = ...
-;; ... = A[10*i + j - 1];
+;; A[10*i + j] = 0;
+;; *B++ = A[10*i + j - 1];
define void @banerjee1(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
entry:
%cmp4 = icmp sgt i64 %n, 0
br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end9
+; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - flow [* <>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* *|<]!
+
for.cond1.preheader.preheader: ; preds = %entry
%0 = add i64 %n, 1
br label %for.cond1.preheader
@@ -85,7 +98,6 @@ for.body3: ; preds = %for.body3.preheader
%sub = add nsw i64 %add5, -1
%arrayidx6 = getelementptr inbounds i64* %A, i64 %sub
%2 = load i64* %arrayidx6, align 8
-; CHECK: da analyze - flow [* <>]!
%incdec.ptr = getelementptr inbounds i64* %B.addr.12, i64 1
store i64 %2, i64* %B.addr.12, align 8
%inc = add nsw i64 %j.03, 1
@@ -119,6 +131,13 @@ define void @banerjee2(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
+; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
for.cond1.preheader: ; preds = %entry, %for.inc8
%B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -136,7 +155,6 @@ for.body3: ; preds = %for.cond1.preheader
%add6 = add nsw i64 %add5, 100
%arrayidx7 = getelementptr inbounds i64* %A, i64 %add6
%0 = load i64* %arrayidx7, align 8
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
store i64 %0, i64* %B.addr.11, align 8
%inc = add nsw i64 %j.02, 1
@@ -156,13 +174,20 @@ for.end10: ; preds = %for.inc8
;; for (long int i = 0; i < 10; i++)
;; for (long int j = 0; j < 10; j++) {
-;; A[10*i + j] = ...
-;; ... = A[10*i + j + 99];
+;; A[10*i + j] = 0;
+;; *B++ = A[10*i + j + 99];
define void @banerjee3(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
+; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - flow [> >]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
for.cond1.preheader: ; preds = %entry, %for.inc8
%B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -180,7 +205,6 @@ for.body3: ; preds = %for.cond1.preheader
%add6 = add nsw i64 %add5, 99
%arrayidx7 = getelementptr inbounds i64* %A, i64 %add6
%0 = load i64* %arrayidx7, align 8
-; CHECK: da analyze - flow [> >]!
%incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
store i64 %0, i64* %B.addr.11, align 8
%inc = add nsw i64 %j.02, 1
@@ -200,13 +224,20 @@ for.end10: ; preds = %for.inc8
;; for (long int i = 0; i < 10; i++)
;; for (long int j = 0; j < 10; j++) {
-;; A[10*i + j] = ...
-;; ... = A[10*i + j - 100];
+;; A[10*i + j] = 0;
+;; *B++ = A[10*i + j - 100];
define void @banerjee4(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
+; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
for.cond1.preheader: ; preds = %entry, %for.inc7
%B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
@@ -224,7 +255,6 @@ for.body3: ; preds = %for.cond1.preheader
%sub = add nsw i64 %add5, -100
%arrayidx6 = getelementptr inbounds i64* %A, i64 %sub
%0 = load i64* %arrayidx6, align 8
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
store i64 %0, i64* %B.addr.11, align 8
%inc = add nsw i64 %j.02, 1
@@ -244,13 +274,20 @@ for.end9: ; preds = %for.inc7
;; for (long int i = 0; i < 10; i++)
;; for (long int j = 0; j < 10; j++) {
-;; A[10*i + j] = ...
-;; ... = A[10*i + j - 99];
+;; A[10*i + j] = 0;
+;; *B++ = A[10*i + j - 99];
define void @banerjee5(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
+; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - flow [< <]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
for.cond1.preheader: ; preds = %entry, %for.inc7
%B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
@@ -268,7 +305,6 @@ for.body3: ; preds = %for.cond1.preheader
%sub = add nsw i64 %add5, -99
%arrayidx6 = getelementptr inbounds i64* %A, i64 %sub
%0 = load i64* %arrayidx6, align 8
-; CHECK: da analyze - flow [< <]!
%incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
store i64 %0, i64* %B.addr.11, align 8
%inc = add nsw i64 %j.02, 1
@@ -288,13 +324,20 @@ for.end9: ; preds = %for.inc7
;; for (long int i = 0; i < 10; i++)
;; for (long int j = 0; j < 10; j++) {
-;; A[10*i + j] = ...
-;; ... = A[10*i + j + 9];
+;; A[10*i + j] = 0;
+;; *B++ = A[10*i + j + 9];
define void @banerjee6(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
+; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - flow [=> <>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
for.cond1.preheader: ; preds = %entry, %for.inc8
%B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -312,7 +355,6 @@ for.body3: ; preds = %for.cond1.preheader
%add6 = add nsw i64 %add5, 9
%arrayidx7 = getelementptr inbounds i64* %A, i64 %add6
%0 = load i64* %arrayidx7, align 8
-; CHECK: da analyze - flow [=> <>]!
%incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
store i64 %0, i64* %B.addr.11, align 8
%inc = add nsw i64 %j.02, 1
@@ -332,13 +374,20 @@ for.end10: ; preds = %for.inc8
;; for (long int i = 0; i < 10; i++)
;; for (long int j = 0; j < 10; j++) {
-;; A[10*i + j] = ...
-;; ... = A[10*i + j + 10];
+;; A[10*i + j] = 0;
+;; *B++ = A[10*i + j + 10];
define void @banerjee7(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
+; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - flow [> <=]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
for.cond1.preheader: ; preds = %entry, %for.inc8
%B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -356,7 +405,6 @@ for.body3: ; preds = %for.cond1.preheader
%add6 = add nsw i64 %add5, 10
%arrayidx7 = getelementptr inbounds i64* %A, i64 %add6
%0 = load i64* %arrayidx7, align 8
-; CHECK: da analyze - flow [> <=]!
%incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
store i64 %0, i64* %B.addr.11, align 8
%inc = add nsw i64 %j.02, 1
@@ -376,13 +424,20 @@ for.end10: ; preds = %for.inc8
;; for (long int i = 0; i < 10; i++)
;; for (long int j = 0; j < 10; j++) {
-;; A[10*i + j] = ...
-;; ... = A[10*i + j + 11];
+;; A[10*i + j] = 0;
+;; *B++ = A[10*i + j + 11];
define void @banerjee8(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
+; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - flow [> <>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
for.cond1.preheader: ; preds = %entry, %for.inc8
%B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -400,7 +455,6 @@ for.body3: ; preds = %for.cond1.preheader
%add6 = add nsw i64 %add5, 11
%arrayidx7 = getelementptr inbounds i64* %A, i64 %add6
%0 = load i64* %arrayidx7, align 8
-; CHECK: da analyze - flow [> <>]!
%incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
store i64 %0, i64* %B.addr.11, align 8
%inc = add nsw i64 %j.02, 1
@@ -420,13 +474,20 @@ for.end10: ; preds = %for.inc8
;; for (long int i = 0; i < 20; i++)
;; for (long int j = 0; j < 20; j++) {
-;; A[30*i + 500*j] = ...
-;; ... = A[i - 500*j + 11];
+;; A[30*i + 500*j] = 0;
+;; *B++ = A[i - 500*j + 11];
define void @banerjee9(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
+; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - flow [<= =|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
for.cond1.preheader: ; preds = %entry, %for.inc8
%B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -445,7 +506,6 @@ for.body3: ; preds = %for.cond1.preheader
%add6 = add nsw i64 %sub, 11
%arrayidx7 = getelementptr inbounds i64* %A, i64 %add6
%1 = load i64* %arrayidx7, align 8
-; CHECK: da analyze - flow [<= =|<]!
%incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
store i64 %1, i64* %B.addr.11, align 8
%inc = add nsw i64 %j.02, 1
@@ -465,13 +525,20 @@ for.end10: ; preds = %for.inc8
;; for (long int i = 0; i < 20; i++)
;; for (long int j = 0; j < 20; j++) {
-;; A[i + 500*j] = ...
-;; ... = A[i - 500*j + 11];
+;; A[i + 500*j] = 0;
+;; *B++ = A[i - 500*j + 11];
define void @banerjee10(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
+; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - flow [<> =]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
for.cond1.preheader: ; preds = %entry, %for.inc7
%B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
@@ -489,7 +556,6 @@ for.body3: ; preds = %for.cond1.preheader
%add5 = add nsw i64 %sub, 11
%arrayidx6 = getelementptr inbounds i64* %A, i64 %add5
%1 = load i64* %arrayidx6, align 8
-; CHECK: da analyze - flow [<> =]!
%incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
store i64 %1, i64* %B.addr.11, align 8
%inc = add nsw i64 %j.02, 1
@@ -509,13 +575,20 @@ for.end9: ; preds = %for.inc7
;; for (long int i = 0; i < 20; i++)
;; for (long int j = 0; j < 20; j++) {
-;; A[300*i + j] = ...
-;; ... = A[250*i - j + 11];
+;; A[300*i + j] = 0;
+;; *B++ = A[250*i - j + 11];
define void @banerjee11(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
+; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - flow [<= <>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
for.cond1.preheader: ; preds = %entry, %for.inc7
%B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
@@ -533,7 +606,6 @@ for.body3: ; preds = %for.cond1.preheader
%add5 = add nsw i64 %sub, 11
%arrayidx6 = getelementptr inbounds i64* %A, i64 %add5
%0 = load i64* %arrayidx6, align 8
-; CHECK: da analyze - flow [<= <>]!
%incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
store i64 %0, i64* %B.addr.11, align 8
%inc = add nsw i64 %j.02, 1
@@ -553,13 +625,20 @@ for.end9: ; preds = %for.inc7
;; for (long int i = 0; i < 20; i++)
;; for (long int j = 0; j < 20; j++) {
-;; A[100*i + j] = ...
-;; ... = A[100*i - j + 11];
+;; A[100*i + j] = 0;
+;; *B++ = A[100*i - j + 11];
define void @banerjee12(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
+; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - flow [= <>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
for.cond1.preheader: ; preds = %entry, %for.inc7
%B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
@@ -577,7 +656,6 @@ for.body3: ; preds = %for.cond1.preheader
%add5 = add nsw i64 %sub, 11
%arrayidx6 = getelementptr inbounds i64* %A, i64 %add5
%0 = load i64* %arrayidx6, align 8
-; CHECK: da analyze - flow [= <>]!
%incdec.ptr = getelementptr inbounds i64* %B.addr.11, i64 1
store i64 %0, i64* %B.addr.11, align 8
%inc = add nsw i64 %j.02, 1
diff --git a/test/Analysis/DependenceAnalysis/Coupled.ll b/test/Analysis/DependenceAnalysis/Coupled.ll
index 60163fe7c2..a5989fb88e 100644
--- a/test/Analysis/DependenceAnalysis/Coupled.ll
+++ b/test/Analysis/DependenceAnalysis/Coupled.ll
@@ -5,15 +5,22 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-macosx10.6.0"
-;; for (long int i = 0; i < 50; i++)
-;; A[i][i] = ...
-;; ... = A[i + 10][i + 9]
+;; for (long int i = 0; i < 50; i++) {
+;; A[i][i] = i;
+;; *B++ = A[i + 10][i + 9];
define void @couple0([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -23,27 +30,33 @@ for.body: ; preds = %for.body, %entry
%add2 = add nsw i64 %i.02, 10
%arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %add2, i64 %add
%0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add nsw i64 %i.02, 1
- %cmp = icmp slt i64 %inc, 50
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 50
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long int i = 0; i < 50; i++)
-;; A[i][i] = ...
-;; ... = A[i + 9][i + 9]
+;; for (long int i = 0; i < 50; i++) {
+;; A[i][i] = i;
+;; *B++ = A[i + 9][i + 9];
define void @couple1([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - consistent flow [-9]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -53,27 +66,33 @@ for.body: ; preds = %for.body, %entry
%add2 = add nsw i64 %i.02, 9
%arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %add2, i64 %add
%0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - consistent flow [-9]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add nsw i64 %i.02, 1
- %cmp = icmp slt i64 %inc, 50
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 50
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long int i = 0; i < 50; i++)
-;; A[3*i - 6][3*i - 6] = ...
-;; ... = A[i][i]
+;; for (long int i = 0; i < 50; i++) {
+;; A[3*i - 6][3*i - 6] = i;
+;; *B++ = A[i][i];
define void @couple2([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [*|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -85,27 +104,33 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx3, align 4
%arrayidx5 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
%0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - flow [*|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add nsw i64 %i.02, 1
- %cmp = icmp slt i64 %inc, 50
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 50
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long int i = 0; i < 50; i++)
-;; A[3*i - 6][3*i - 5] = ...
-;; ... = A[i][i]
+;; for (long int i = 0; i < 50; i++) {
+;; A[3*i - 6][3*i - 5] = i;
+;; *B++ = A[i][i];
define void @couple3([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -117,27 +142,33 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx3, align 4
%arrayidx5 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
%0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add nsw i64 %i.02, 1
- %cmp = icmp slt i64 %inc, 50
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 50
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long int i = 0; i < 50; i++)
-;; A[3*i - 6][3*i - n] = ...
-;; ... = A[i][i]
+;; for (long int i = 0; i < 50; i++) {
+;; A[3*i - 6][3*i - n] = i;
+;; *B++ = A[i][i];
define void @couple4([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [*|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -150,27 +181,33 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx4, align 4
%arrayidx6 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
%0 = load i32* %arrayidx6, align 4
-; CHECK: da analyze - flow [*|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add nsw i64 %i.02, 1
- %cmp = icmp slt i64 %inc, 50
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 50
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long int i = 0; i < 50; i++)
-;; A[3*i - n + 1][3*i - n] = ...
-;; ... = A[i][i]
+;; for (long int i = 0; i < 50; i++) {
+;; A[3*i - n + 1][3*i - n] = i;
+;; *B++ = A[i][i];
define void @couple5([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -185,27 +222,33 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx5, align 4
%arrayidx7 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
%0 = load i32* %arrayidx7, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add nsw i64 %i.02, 1
- %cmp = icmp slt i64 %inc, 50
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 50
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long int i = 0; i < 50; i++)
-;; A[i][3*i - 6] = ...
-;; ... = A[i][i]
+;; for (long int i = 0; i < 50; i++) {
+;; A[i][3*i - 6] = i;
+;; *B++ = A[i][i];
define void @couple6([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [=|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -215,27 +258,33 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx1, align 4
%arrayidx3 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
%0 = load i32* %arrayidx3, align 4
-; CHECK: da analyze - flow [=|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add nsw i64 %i.02, 1
- %cmp = icmp slt i64 %inc, 50
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 50
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long int i = 0; i < 50; i++)
-;; A[i][3*i - 5] = ...
-;; ... = A[i][i]
+;; for (long int i = 0; i < 50; i++) {
+;; A[i][3*i - 5] = i;
+;; *B++ = A[i][i];
define void @couple7([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -245,27 +294,33 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx1, align 4
%arrayidx3 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
%0 = load i32* %arrayidx3, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add nsw i64 %i.02, 1
- %cmp = icmp slt i64 %inc, 50
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 50
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long int i = 0; i <= 15; i++)
-;; A[3*i - 18][3 - i] = ...
-;; ... = A[i][i]
+;; for (long int i = 0; i <= 15; i++) {
+;; A[3*i - 18][3 - i] = i;
+;; *B++ = A[i][i];
define void @couple8([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -276,27 +331,33 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
%0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add nsw i64 %i.02, 1
- %cmp = icmp slt i64 %inc, 16
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 16
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long int i = 0; i <= 15; i++)
-;; A[3*i - 18][2 - i] = ...
-;; ... = A[i][i]
+;; for (long int i = 0; i <= 15; i++) {
+;; A[3*i - 18][2 - i] = i;
+;; *B++ = A[i][i];
define void @couple9([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -307,27 +368,34 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
%0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add nsw i64 %i.02, 1
- %cmp = icmp slt i64 %inc, 16
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 16
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long int i = 0; i <= 15; i++)
-;; A[3*i - 18][6 - i] = ...
-;; ... = A[i][i]
+;; for (long int i = 0; i <= 15; i++) {
+;; A[3*i - 18][6 - i] = i;
+;; *B++ = A[i][i];
define void @couple10([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [>] splitable!
+; CHECK: da analyze - split level = 1, iteration = 3!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -338,28 +406,34 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
%0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - flow [>] splitable!
-; CHECK: da analyze - split level = 1, iteration = 3!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add nsw i64 %i.02, 1
- %cmp = icmp slt i64 %inc, 16
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 16
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long int i = 0; i <= 15; i++)
-;; A[3*i - 18][18 - i] = ...
-;; ... = A[i][i]
+;; for (long int i = 0; i <= 15; i++) {
+;; A[3*i - 18][18 - i] = i;
+;; *B++ = A[i][i];
define void @couple11([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [=|<] splitable!
+; CHECK: da analyze - split level = 1, iteration = 9!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -370,28 +444,34 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
%0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - flow [=|<] splitable!
-; CHECK: da analyze - split level = 1, iteration = 9!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add nsw i64 %i.02, 1
- %cmp = icmp slt i64 %inc, 16
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 16
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long int i = 0; i <= 12; i++)
-;; A[3*i - 18][22 - i] = ...
-;; ... = A[i][i]
+;; for (long int i = 0; i <= 12; i++) {
+;; A[3*i - 18][22 - i] = i;
+;; *B++ = A[i][i];
define void @couple12([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [<] splitable!
+; CHECK: da analyze - split level = 1, iteration = 11!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -402,28 +482,33 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
%0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - flow [<] splitable!
-; CHECK: da analyze - split level = 1, iteration = 11!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add nsw i64 %i.02, 1
- %cmp = icmp slt i64 %inc, 13
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 13
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long int i = 0; i < 12; i++)
-;; A[3*i - 18][22 - i] = ...
-;; ... = A[i][i]
+;; for (long int i = 0; i < 12; i++) {
+;; A[3*i - 18][22 - i] = i;
+;; *B++ = A[i][i];
define void @couple13([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -434,27 +519,33 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds [100 x i32]* %A, i64 %i.02, i64 %i.02
%0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add nsw i64 %i.02, 1
- %cmp = icmp slt i64 %inc, 12
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 12
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-
-;; for (long int i = 0; i < 100; i++)
-;; A[3*i - 18][18 - i][i] = ...
-;; ... = A[i][i][i]
+;; for (long int i = 0; i < 100; i++) {
+;; A[3*i - 18][18 - i][i] = i;
+;; *B++ = A[i][i][i];
define void @couple14([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [=|<] splitable!
+; CHECK: da analyze - split level = 1, iteration = 9!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -465,28 +556,33 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx3, align 4
%arrayidx6 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %i.02, i64 %i.02, i64 %i.02
%0 = load i32* %arrayidx6, align 4
-; CHECK: da analyze - flow [=|<] splitable!
-; CHECK: da analyze - split level = 1, iteration = 9!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add nsw i64 %i.02, 1
- %cmp = icmp slt i64 %inc, 100
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 100
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long int i = 0; i < 100; i++)
-;; A[3*i - 18][22 - i][i] = ...
-;; ... = A[i][i][i]
+;; for (long int i = 0; i < 100; i++) {
+;; A[3*i - 18][22 - i][i] = i;
+;; *B++ = A[i][i][i];
define void @couple15([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -497,12 +593,11 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx3, align 4
%arrayidx6 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %i.02, i64 %i.02, i64 %i.02
%0 = load i32* %arrayidx6, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add nsw i64 %i.02, 1
- %cmp = icmp slt i64 %inc, 100
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 100
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
diff --git a/test/Analysis/DependenceAnalysis/ExactRDIV.ll b/test/Analysis/DependenceAnalysis/ExactRDIV.ll
index aa5d254a0c..8120739cc5 100644
--- a/test/Analysis/DependenceAnalysis/ExactRDIV.ll
+++ b/test/Analysis/DependenceAnalysis/ExactRDIV.ll
@@ -6,15 +6,22 @@ target triple = "x86_64-apple-macosx10.6.0"
;; for (long int i = 0; i < 10; i++)
-;; A[4*i + 10] = ...
+;; A[4*i + 10] = i;
;; for (long int j = 0; j < 10; j++)
-;; ... = A[2*j + 1];
+;; *B++ = A[2*j + 1];
define void @rdiv0(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%conv = trunc i64 %i.03 to i32
%mul = shl nsw i64 %i.03, 2
@@ -22,22 +29,24 @@ for.body: ; preds = %for.body, %entry
%arrayidx = getelementptr inbounds i32* %A, i64 %add
store i32 %conv, i32* %arrayidx, align 4
%inc = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc, 10
- br i1 %cmp, label %for.body, label %for.body4
+ %exitcond5 = icmp ne i64 %inc, 10
+ br i1 %exitcond5, label %for.body, label %for.body4.preheader
-for.body4: ; preds = %for.body4, %for.body
- %j.02 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.body ]
- %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+for.body4.preheader: ; preds = %for.body
+ br label %for.body4
+
+for.body4: ; preds = %for.body4.preheader, %for.body4
+ %j.02 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.body4.preheader ]
+ %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
%mul5 = shl nsw i64 %j.02, 1
%add64 = or i64 %mul5, 1
%arrayidx7 = getelementptr inbounds i32* %A, i64 %add64
%0 = load i32* %arrayidx7, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc9 = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc9, 10
- br i1 %cmp2, label %for.body4, label %for.end10
+ %exitcond = icmp ne i64 %inc9, 10
+ br i1 %exitcond, label %for.body4, label %for.end10
for.end10: ; preds = %for.body4
ret void
@@ -45,15 +54,22 @@ for.end10: ; preds = %for.body4
;; for (long int i = 0; i < 5; i++)
-;; A[11*i - 45] = ...
+;; A[11*i - 45] = i;
;; for (long int j = 0; j < 10; j++)
-;; ... = A[j];
+;; *B++ = A[j];
define void @rdiv1(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%conv = trunc i64 %i.03 to i32
%mul = mul nsw i64 %i.03, 11
@@ -61,20 +77,22 @@ for.body: ; preds = %for.body, %entry
%arrayidx = getelementptr inbounds i32* %A, i64 %sub
store i32 %conv, i32* %arrayidx, align 4
%inc = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc, 5
- br i1 %cmp, label %for.body, label %for.body4
+ %exitcond4 = icmp ne i64 %inc, 5
+ br i1 %exitcond4, label %for.body, label %for.body4.preheader
+
+for.body4.preheader: ; preds = %for.body
+ br label %for.body4
-for.body4: ; preds = %for.body4, %for.body
- %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
- %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+for.body4: ; preds = %for.body4.preheader, %for.body4
+ %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
+ %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
%arrayidx5 = getelementptr inbounds i32* %A, i64 %j.02
%0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc7 = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc7, 10
- br i1 %cmp2, label %for.body4, label %for.end8
+ %exitcond = icmp ne i64 %inc7, 10
+ br i1 %exitcond, label %for.body4, label %for.end8
for.end8: ; preds = %for.body4
ret void
@@ -82,15 +100,22 @@ for.end8: ; preds = %for.body4
;; for (long int i = 0; i <= 5; i++)
-;; A[11*i - 45] = ...
+;; A[11*i - 45] = i;
;; for (long int j = 0; j < 10; j++)
-;; ... = A[j];
+;; *B++ = A[j];
define void @rdiv2(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%conv = trunc i64 %i.03 to i32
%mul = mul nsw i64 %i.03, 11
@@ -98,20 +123,22 @@ for.body: ; preds = %for.body, %entry
%arrayidx = getelementptr inbounds i32* %A, i64 %sub
store i32 %conv, i32* %arrayidx, align 4
%inc = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc, 6
- br i1 %cmp, label %for.body, label %for.body4
+ %exitcond4 = icmp ne i64 %inc, 6
+ br i1 %exitcond4, label %for.body, label %for.body4.preheader
+
+for.body4.preheader: ; preds = %for.body
+ br label %for.body4
-for.body4: ; preds = %for.body4, %for.body
- %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
- %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+for.body4: ; preds = %for.body4.preheader, %for.body4
+ %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
+ %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
%arrayidx5 = getelementptr inbounds i32* %A, i64 %j.02
%0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc7 = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc7, 10
- br i1 %cmp2, label %for.body4, label %for.end8
+ %exitcond = icmp ne i64 %inc7, 10
+ br i1 %exitcond, label %for.body4, label %for.end8
for.end8: ; preds = %for.body4
ret void
@@ -119,15 +146,22 @@ for.end8: ; preds = %for.body4
;; for (long int i = 0; i < 5; i++)
-;; A[11*i - 45] = ...
+;; A[11*i - 45] = i;
;; for (long int j = 0; j <= 10; j++)
-;; ... = A[j];
+;; *B++ = A[j];
define void @rdiv3(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%conv = trunc i64 %i.03 to i32
%mul = mul nsw i64 %i.03, 11
@@ -135,20 +169,22 @@ for.body: ; preds = %for.body, %entry
%arrayidx = getelementptr inbounds i32* %A, i64 %sub
store i32 %conv, i32* %arrayidx, align 4
%inc = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc, 5
- br i1 %cmp, label %for.body, label %for.body4
+ %exitcond4 = icmp ne i64 %inc, 5
+ br i1 %exitcond4, label %for.body, label %for.body4.preheader
-for.body4: ; preds = %for.body4, %for.body
- %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
- %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+for.body4.preheader: ; preds = %for.body
+ br label %for.body4
+
+for.body4: ; preds = %for.body4.preheader, %for.body4
+ %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
+ %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
%arrayidx5 = getelementptr inbounds i32* %A, i64 %j.02
%0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc7 = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc7, 11
- br i1 %cmp2, label %for.body4, label %for.end8
+ %exitcond = icmp ne i64 %inc7, 11
+ br i1 %exitcond, label %for.body4, label %for.end8
for.end8: ; preds = %for.body4
ret void
@@ -156,15 +192,22 @@ for.end8: ; preds = %for.body4
;; for (long int i = 0; i <= 5; i++)
-;; A[11*i - 45] = ...
+;; A[11*i - 45] = i;
;; for (long int j = 0; j <= 10; j++)
-;; ... = A[j];
+;; *B++ = A[j];
define void @rdiv4(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%conv = trunc i64 %i.03 to i32
%mul = mul nsw i64 %i.03, 11
@@ -172,20 +215,22 @@ for.body: ; preds = %for.body, %entry
%arrayidx = getelementptr inbounds i32* %A, i64 %sub
store i32 %conv, i32* %arrayidx, align 4
%inc = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc, 6
- br i1 %cmp, label %for.body, label %for.body4
+ %exitcond4 = icmp ne i64 %inc, 6
+ br i1 %exitcond4, label %for.body, label %for.body4.preheader
-for.body4: ; preds = %for.body4, %for.body
- %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
- %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+for.body4.preheader: ; preds = %for.body
+ br label %for.body4
+
+for.body4: ; preds = %for.body4.preheader, %for.body4
+ %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
+ %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
%arrayidx5 = getelementptr inbounds i32* %A, i64 %j.02
%0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - flow!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc7 = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc7, 11
- br i1 %cmp2, label %for.body4, label %for.end8
+ %exitcond = icmp ne i64 %inc7, 11
+ br i1 %exitcond, label %for.body4, label %for.end8
for.end8: ; preds = %for.body4
ret void
@@ -193,15 +238,22 @@ for.end8: ; preds = %for.body4
;; for (long int i = 0; i < 5; i++)
-;; A[-11*i + 45] = ...
+;; A[-11*i + 45] = i;
;; for (long int j = 0; j < 10; j++)
-;; ... = A[-j];
+;; *B++ = A[-j];
define void @rdiv5(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%conv = trunc i64 %i.03 to i32
%mul = mul nsw i64 %i.03, -11
@@ -209,21 +261,23 @@ for.body: ; preds = %for.body, %entry
%arrayidx = getelementptr inbounds i32* %A, i64 %add
store i32 %conv, i32* %arrayidx, align 4
%inc = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc, 5
- br i1 %cmp, label %for.body, label %for.body4
+ %exitcond4 = icmp ne i64 %inc, 5
+ br i1 %exitcond4, label %for.body, label %for.body4.preheader
+
+for.body4.preheader: ; preds = %for.body
+ br label %for.body4
-for.body4: ; preds = %for.body4, %for.body
- %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
- %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+for.body4: ; preds = %for.body4.preheader, %for.body4
+ %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
+ %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
%sub = sub nsw i64 0, %j.02
%arrayidx5 = getelementptr inbounds i32* %A, i64 %sub
%0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc7 = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc7, 10
- br i1 %cmp2, label %for.body4, label %for.end8
+ %exitcond = icmp ne i64 %inc7, 10
+ br i1 %exitcond, label %for.body4, label %for.end8
for.end8: ; preds = %for.body4
ret void
@@ -231,15 +285,22 @@ for.end8: ; preds = %for.body4
;; for (long int i = 0; i <= 5; i++)
-;; A[-11*i + 45] = ...
+;; A[-11*i + 45] = i;
;; for (long int j = 0; j < 10; j++)
-;; ... = A[-j];
+;; *B++ = A[-j];
define void @rdiv6(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%conv = trunc i64 %i.03 to i32
%mul = mul nsw i64 %i.03, -11
@@ -247,21 +308,23 @@ for.body: ; preds = %for.body, %entry
%arrayidx = getelementptr inbounds i32* %A, i64 %add
store i32 %conv, i32* %arrayidx, align 4
%inc = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc, 6
- br i1 %cmp, label %for.body, label %for.body4
+ %exitcond4 = icmp ne i64 %inc, 6
+ br i1 %exitcond4, label %for.body, label %for.body4.preheader
+
+for.body4.preheader: ; preds = %for.body
+ br label %for.body4
-for.body4: ; preds = %for.body4, %for.body
- %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
- %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+for.body4: ; preds = %for.body4.preheader, %for.body4
+ %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
+ %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
%sub = sub nsw i64 0, %j.02
%arrayidx5 = getelementptr inbounds i32* %A, i64 %sub
%0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc7 = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc7, 10
- br i1 %cmp2, label %for.body4, label %for.end8
+ %exitcond = icmp ne i64 %inc7, 10
+ br i1 %exitcond, label %for.body4, label %for.end8
for.end8: ; preds = %for.body4
ret void
@@ -269,15 +332,22 @@ for.end8: ; preds = %for.body4
;; for (long int i = 0; i < 5; i++)
-;; A[-11*i + 45] = ...
+;; A[-11*i + 45] = i;
;; for (long int j = 0; j <= 10; j++)
-;; ... = A[-j];
+;; *B++ = A[-j];
define void @rdiv7(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%conv = trunc i64 %i.03 to i32
%mul = mul nsw i64 %i.03, -11
@@ -285,21 +355,23 @@ for.body: ; preds = %for.body, %entry
%arrayidx = getelementptr inbounds i32* %A, i64 %add
store i32 %conv, i32* %arrayidx, align 4
%inc = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc, 5
- br i1 %cmp, label %for.body, label %for.body4
+ %exitcond4 = icmp ne i64 %inc, 5
+ br i1 %exitcond4, label %for.body, label %for.body4.preheader
-for.body4: ; preds = %for.body4, %for.body
- %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
- %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+for.body4.preheader: ; preds = %for.body
+ br label %for.body4
+
+for.body4: ; preds = %for.body4.preheader, %for.body4
+ %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
+ %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
%sub = sub nsw i64 0, %j.02
%arrayidx5 = getelementptr inbounds i32* %A, i64 %sub
%0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc7 = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc7, 11
- br i1 %cmp2, label %for.body4, label %for.end8
+ %exitcond = icmp ne i64 %inc7, 11
+ br i1 %exitcond, label %for.body4, label %for.end8
for.end8: ; preds = %for.body4
ret void
@@ -307,15 +379,22 @@ for.end8: ; preds = %for.body4
;; for (long int i = 0; i <= 5; i++)
-;; A[-11*i + 45] = ...
+;; A[-11*i + 45] = i;
;; for (long int j = 0; j <= 10; j++)
-;; ... = A[-j];
+;; *B++ = A[-j];
define void @rdiv8(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%conv = trunc i64 %i.03 to i32
%mul = mul nsw i64 %i.03, -11
@@ -323,21 +402,23 @@ for.body: ; preds = %for.body, %entry
%arrayidx = getelementptr inbounds i32* %A, i64 %add
store i32 %conv, i32* %arrayidx, align 4
%inc = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc, 6
- br i1 %cmp, label %for.body, label %for.body4
+ %exitcond4 = icmp ne i64 %inc, 6
+ br i1 %exitcond4, label %for.body, label %for.body4.preheader
+
+for.body4.preheader: ; preds = %for.body
+ br label %for.body4
-for.body4: ; preds = %for.body4, %for.body
- %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body ]
- %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body ]
+for.body4: ; preds = %for.body4.preheader, %for.body4
+ %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
+ %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
%sub = sub nsw i64 0, %j.02
%arrayidx5 = getelementptr inbounds i32* %A, i64 %sub
%0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - flow!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc7 = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc7, 11
- br i1 %cmp2, label %for.body4, label %for.end8
+ %exitcond = icmp ne i64 %inc7, 11
+ br i1 %exitcond, label %for.body4, label %for.end8
for.end8: ; preds = %for.body4
ret void
@@ -345,20 +426,27 @@ for.end8: ; preds = %for.body4
;; for (long int i = 0; i < 5; i++)
-;; for (long int j = 0; j < 10; j++)
-;; A[11*i - j] = ...
-;; ... = A[45];
+;; for (long int j = 0; j < 10; j++) {
+;; A[11*i - j] = i;
+;; *B++ = A[45];
define void @rdiv9(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.inc5, %entry
- %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc5 ]
+; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S S|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
+for.cond1.preheader: ; preds = %entry, %for.inc5
+ %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc5 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc6, %for.inc5 ]
br label %for.body3
-for.body3: ; preds = %for.body3, %for.cond1.preheader
+for.body3: ; preds = %for.cond1.preheader, %for.body3
%j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
%B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
%conv = trunc i64 %i.03 to i32
@@ -368,38 +456,46 @@ for.body3: ; preds = %for.body3, %for.con
store i32 %conv, i32* %arrayidx, align 4
%arrayidx4 = getelementptr inbounds i32* %A, i64 45
%0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
store i32 %0, i32* %B.addr.11, align 4
%inc = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc, 10
- br i1 %cmp2, label %for.body3, label %for.inc5
+ %exitcond = icmp ne i64 %inc, 10
+ br i1 %exitcond, label %for.body3, label %for.inc5
for.inc5: ; preds = %for.body3
+ %scevgep = getelementptr i32* %B.addr.04, i64 10
%inc6 = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc6, 5
- br i1 %cmp, label %for.cond1.preheader, label %for.end7
+ %exitcond5 = icmp ne i64 %inc6, 5
+ br i1 %exitcond5, label %for.cond1.preheader, label %for.end7
for.end7: ; preds = %for.inc5
ret void
}
-;; for (long int i = 0; i < 5; i++)
-;; for (long int j = 0; j <= 10; j++)
-;; A[11*i - j] = ...
-;; ... = A[45];
+
+;; for (long int i = 0; i <= 5; i++)
+;; for (long int j = 0; j < 10; j++) {
+;; A[11*i - j] = i;
+;; *B++ = A[45];
define void @rdiv10(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.inc5, %entry
- %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc5 ]
+; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S S|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
+for.cond1.preheader: ; preds = %entry, %for.inc5
+ %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc5 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc6, %for.inc5 ]
br label %for.body3
-for.body3: ; preds = %for.body3, %for.cond1.preheader
+for.body3: ; preds = %for.cond1.preheader, %for.body3
%j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
%B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
%conv = trunc i64 %i.03 to i32
@@ -409,38 +505,45 @@ for.body3: ; preds = %for.body3, %for.con
store i32 %conv, i32* %arrayidx, align 4
%arrayidx4 = getelementptr inbounds i32* %A, i64 45
%0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
store i32 %0, i32* %B.addr.11, align 4
%inc = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc, 10
- br i1 %cmp2, label %for.body3, label %for.inc5
+ %exitcond = icmp ne i64 %inc, 10
+ br i1 %exitcond, label %for.body3, label %for.inc5
for.inc5: ; preds = %for.body3
+ %scevgep = getelementptr i32* %B.addr.04, i64 10
%inc6 = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc6, 6
- br i1 %cmp, label %for.cond1.preheader, label %for.end7
+ %exitcond5 = icmp ne i64 %inc6, 6
+ br i1 %exitcond5, label %for.cond1.preheader, label %for.end7
for.end7: ; preds = %for.inc5
ret void
}
-;; for (long int i = 0; i <= 5; i++)
-;; for (long int j = 0; j <= 10; j++)
-;; A[11*i - j] = ...
-;; ... = A[45];
+;; for (long int i = 0; i < 5; i++)
+;; for (long int j = 0; j <= 10; j++) {
+;; A[11*i - j] = i;
+;; *B++ = A[45];
define void @rdiv11(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.inc5, %entry
- %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc5 ]
+; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S S|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
+for.cond1.preheader: ; preds = %entry, %for.inc5
+ %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc5 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc6, %for.inc5 ]
br label %for.body3
-for.body3: ; preds = %for.body3, %for.cond1.preheader
+for.body3: ; preds = %for.cond1.preheader, %for.body3
%j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
%B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
%conv = trunc i64 %i.03 to i32
@@ -450,38 +553,45 @@ for.body3: ; preds = %for.body3, %for.con
store i32 %conv, i32* %arrayidx, align 4
%arrayidx4 = getelementptr inbounds i32* %A, i64 45
%0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
store i32 %0, i32* %B.addr.11, align 4
%inc = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc, 11
- br i1 %cmp2, label %for.body3, label %for.inc5
+ %exitcond = icmp ne i64 %inc, 11
+ br i1 %exitcond, label %for.body3, label %for.inc5
for.inc5: ; preds = %for.body3
+ %scevgep = getelementptr i32* %B.addr.04, i64 11
%inc6 = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc6, 5
- br i1 %cmp, label %for.cond1.preheader, label %for.end7
+ %exitcond5 = icmp ne i64 %inc6, 5
+ br i1 %exitcond5, label %for.cond1.preheader, label %for.end7
for.end7: ; preds = %for.inc5
ret void
}
-;; for (long int i = 0; i < 5; i++)
-;; for (long int j = 0; j < 10; j++)
-;; A[11*i - j] = ...
-;; ... = A[45];
+;; for (long int i = 0; i <= 5; i++)
+;; for (long int j = 0; j <= 10; j++) {
+;; A[11*i - j] = i;
+;; *B++ = A[45];
define void @rdiv12(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.inc5, %entry
- %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc5 ]
+; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - flow [* *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S S|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
+for.cond1.preheader: ; preds = %entry, %for.inc5
+ %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc5 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc6, %for.inc5 ]
br label %for.body3
-for.body3: ; preds = %for.body3, %for.cond1.preheader
+for.body3: ; preds = %for.cond1.preheader, %for.body3
%j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
%B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
%conv = trunc i64 %i.03 to i32
@@ -491,17 +601,17 @@ for.body3: ; preds = %for.body3, %for.con
store i32 %conv, i32* %arrayidx, align 4
%arrayidx4 = getelementptr inbounds i32* %A, i64 45
%0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - flow [* *|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
store i32 %0, i32* %B.addr.11, align 4
%inc = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc, 11
- br i1 %cmp2, label %for.body3, label %for.inc5
+ %exitcond = icmp ne i64 %inc, 11
+ br i1 %exitcond, label %for.body3, label %for.inc5
for.inc5: ; preds = %for.body3
+ %scevgep = getelementptr i32* %B.addr.04, i64 11
%inc6 = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc6, 6
- br i1 %cmp, label %for.cond1.preheader, label %for.end7
+ %exitcond5 = icmp ne i64 %inc6, 6
+ br i1 %exitcond5, label %for.cond1.preheader, label %for.end7
for.end7: ; preds = %for.inc5
ret void
diff --git a/test/Analysis/DependenceAnalysis/ExactSIV.ll b/test/Analysis/DependenceAnalysis/ExactSIV.ll
index 71e0502462..7485034108 100644
--- a/test/Analysis/DependenceAnalysis/ExactSIV.ll
+++ b/test/Analysis/DependenceAnalysis/ExactSIV.ll
@@ -6,14 +6,21 @@ target triple = "x86_64-apple-macosx10.6.0"
;; for (long unsigned i = 0; i < 10; i++) {
-;; A[i + 10] = ...
-;; ... = A[2*i + 1];
+;; A[i + 10] = i;
+;; *B++ = A[2*i + 1];
define void @exact0(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [<=|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -24,12 +31,11 @@ for.body: ; preds = %for.body, %entry
%add13 = or i64 %mul, 1
%arrayidx2 = getelementptr inbounds i32* %A, i64 %add13
%0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - flow [<=|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 10
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 10
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
@@ -37,14 +43,21 @@ for.end: ; preds = %for.body
;; for (long unsigned i = 0; i < 10; i++) {
-;; A[4*i + 10] = ...
-;; ... = A[2*i + 1];
+;; A[4*i + 10] = i;
+;; *B++ = A[2*i + 1];
define void @exact1(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -56,12 +69,11 @@ for.body: ; preds = %for.body, %entry
%add23 = or i64 %mul1, 1
%arrayidx3 = getelementptr inbounds i32* %A, i64 %add23
%0 = load i32* %arrayidx3, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 10
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 10
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
@@ -69,14 +81,21 @@ for.end: ; preds = %for.body
;; for (long unsigned i = 0; i < 10; i++) {
-;; A[6*i] = ...
-;; ... = A[i + 60];
+;; A[6*i] = i;
+;; *B++ = A[i + 60];
define void @exact2(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -86,12 +105,11 @@ for.body: ; preds = %for.body, %entry
%add = add i64 %i.02, 60
%arrayidx1 = getelementptr inbounds i32* %A, i64 %add
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 10
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 10
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
@@ -99,14 +117,21 @@ for.end: ; preds = %for.body
;; for (long unsigned i = 0; i <= 10; i++) {
-;; A[6*i] = ...
-;; ... = A[i + 60];
+;; A[6*i] = i;
+;; *B++ = A[i + 60];
define void @exact3(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -116,12 +141,11 @@ for.body: ; preds = %for.body, %entry
%add = add i64 %i.02, 60
%arrayidx1 = getelementptr inbounds i32* %A, i64 %add
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [>]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 11
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 11
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
@@ -129,14 +153,21 @@ for.end: ; preds = %for.body
;; for (long unsigned i = 0; i < 12; i++) {
-;; A[6*i] = ...
-;; ... = A[i + 60];
+;; A[6*i] = i;
+;; *B++ = A[i + 60];
define void @exact4(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -146,12 +177,11 @@ for.body: ; preds = %for.body, %entry
%add = add i64 %i.02, 60
%arrayidx1 = getelementptr inbounds i32* %A, i64 %add
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [>]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 12
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 12
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
@@ -159,14 +189,21 @@ for.end: ; preds = %for.body
;; for (long unsigned i = 0; i <= 12; i++) {
-;; A[6*i] = ...
-;; ... = A[i + 60];
+;; A[6*i] = i;
+;; *B++ = A[i + 60];
define void @exact5(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [=>|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -176,12 +213,11 @@ for.body: ; preds = %for.body, %entry
%add = add i64 %i.02, 60
%arrayidx1 = getelementptr inbounds i32* %A, i64 %add
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [=>|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 13
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 13
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
@@ -189,14 +225,21 @@ for.end: ; preds = %for.body
;; for (long unsigned i = 0; i < 18; i++) {
-;; A[6*i] = ...
-;; ... = A[i + 60];
+;; A[6*i] = i;
+;; *B++ = A[i + 60];
define void @exact6(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [=>|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -206,12 +249,11 @@ for.body: ; preds = %for.body, %entry
%add = add i64 %i.02, 60
%arrayidx1 = getelementptr inbounds i32* %A, i64 %add
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [=>|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 18
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 18
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
@@ -219,14 +261,21 @@ for.end: ; preds = %for.body
;; for (long unsigned i = 0; i <= 18; i++) {
-;; A[6*i] = ...
-;; ... = A[i + 60];
+;; A[6*i] = i;
+;; *B++ = A[i + 60];
define void @exact7(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [*|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -236,12 +285,11 @@ for.body: ; preds = %for.body, %entry
%add = add i64 %i.02, 60
%arrayidx1 = getelementptr inbounds i32* %A, i64 %add
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [*|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 19
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 19
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
@@ -249,14 +297,21 @@ for.end: ; preds = %for.body
;; for (long unsigned i = 0; i < 10; i++) {
-;; A[-6*i] = ...
-;; ... = A[-i - 60];
+;; A[-6*i] = i;
+;; *B++ = A[-i - 60];
define void @exact8(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -266,12 +321,11 @@ for.body: ; preds = %for.body, %entry
%sub1 = sub i64 -60, %i.02
%arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1
%0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 10
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 10
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
@@ -279,14 +333,21 @@ for.end: ; preds = %for.body
;; for (long unsigned i = 0; i <= 10; i++) {
-;; A[-6*i] = ...
-;; ... = A[-i - 60];
+;; A[-6*i] = i;
+;; *B++ = A[-i - 60];
define void @exact9(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -296,12 +357,11 @@ for.body: ; preds = %for.body, %entry
%sub1 = sub i64 -60, %i.02
%arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1
%0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - flow [>]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 11
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 11
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
@@ -309,14 +369,21 @@ for.end: ; preds = %for.body
;; for (long unsigned i = 0; i < 12; i++) {
-;; A[-6*i] = ...
-;; ... = A[-i - 60];
+;; A[-6*i] = i;
+;; *B++ = A[-i - 60];
define void @exact10(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -326,12 +393,11 @@ for.body: ; preds = %for.body, %entry
%sub1 = sub i64 -60, %i.02
%arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1
%0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - flow [>]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 12
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 12
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
@@ -339,14 +405,21 @@ for.end: ; preds = %for.body
;; for (long unsigned i = 0; i <= 12; i++) {
-;; A[-6*i] = ...
-;; ... = A[-i - 60];
+;; A[-6*i] = i;
+;; *B++ = A[-i - 60];
define void @exact11(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [=>|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -356,12 +429,11 @@ for.body: ; preds = %for.body, %entry
%sub1 = sub i64 -60, %i.02
%arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1
%0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - flow [=>|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 13
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 13
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
@@ -369,14 +441,21 @@ for.end: ; preds = %for.body
;; for (long unsigned i = 0; i < 18; i++) {
-;; A[-6*i] = ...
-;; ... = A[-i - 60];
+;; A[-6*i] = i;
+;; *B++ = A[-i - 60];
define void @exact12(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [=>|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -386,12 +465,11 @@ for.body: ; preds = %for.body, %entry
%sub1 = sub i64 -60, %i.02
%arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1
%0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - flow [=>|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 18
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 18
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
@@ -399,14 +477,21 @@ for.end: ; preds = %for.body
;; for (long unsigned i = 0; i <= 18; i++) {
-;; A[-6*i] = ...
-;; ... = A[-i - 60];
+;; A[-6*i] = i;
+;; *B++ = A[-i - 60];
define void @exact13(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [*|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -416,12 +501,11 @@ for.body: ; preds = %for.body, %entry
%sub1 = sub i64 -60, %i.02
%arrayidx2 = getelementptr inbounds i32* %A, i64 %sub1
%0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - flow [*|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 19
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 19
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
diff --git a/test/Analysis/DependenceAnalysis/GCD.ll b/test/Analysis/DependenceAnalysis/GCD.ll
index 94c93a8a0d..6bad8ae9a1 100644
--- a/test/Analysis/DependenceAnalysis/GCD.ll
+++ b/test/Analysis/DependenceAnalysis/GCD.ll
@@ -6,14 +6,21 @@ target triple = "x86_64-apple-macosx10.6.0"
;; for (long int i = 0; i < 100; i++)
-;; for (long int j = 0; j < 100; j++)
-;; A[2*i - 4*j] = ...
-;; ... = A[6*i + 8*j];
+;; for (long int j = 0; j < 100; j++) {
+;; A[2*i - 4*j] = i;
+;; *B++ = A[6*i + 8*j];
define void @gcd0(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
+; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - flow [=> *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
for.cond1.preheader: ; preds = %entry, %for.inc8
%B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc8 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
@@ -33,7 +40,6 @@ for.body3: ; preds = %for.cond1.preheader
%add = add nsw i64 %mul5, %mul6
%arrayidx7 = getelementptr inbounds i32* %A, i64 %add
%0 = load i32* %arrayidx7, align 4
-; CHECK: da analyze - flow [=> *|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
store i32 %0, i32* %B.addr.11, align 4
%inc = add nsw i64 %j.02, 1
@@ -52,14 +58,21 @@ for.end10: ; preds = %for.inc8
;; for (long int i = 0; i < 100; i++)
-;; for (long int j = 0; j < 100; j++)
-;; A[2*i - 4*j] = ...
-;; ... = A[6*i + 8*j + 1];
+;; for (long int j = 0; j < 100; j++) {
+;; A[2*i - 4*j] = i;
+;; *B++ = A[6*i + 8*j + 1];
define void @gcd1(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
+; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
for.cond1.preheader: ; preds = %entry, %for.inc9
%B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ]
@@ -80,7 +93,6 @@ for.body3: ; preds = %for.cond1.preheader
%add7 = or i64 %add, 1
%arrayidx8 = getelementptr inbounds i32* %A, i64 %add7
%0 = load i32* %arrayidx8, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
store i32 %0, i32* %B.addr.11, align 4
%inc = add nsw i64 %j.02, 1
@@ -99,14 +111,21 @@ for.end11: ; preds = %for.inc9
;; for (long int i = 0; i < 100; i++)
-;; for (long int j = 0; j < 100; j++)
-;; A[2*i - 4*j + 1] = ...
-;; ... = A[6*i + 8*j];
+;; for (long int j = 0; j < 100; j++) {
+;; A[2*i - 4*j + 1] = i;
+;; *B++ = A[6*i + 8*j];
define void @gcd2(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
+; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
for.cond1.preheader: ; preds = %entry, %for.inc9
%B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ]
@@ -127,7 +146,6 @@ for.body3: ; preds = %for.cond1.preheader
%add7 = add nsw i64 %mul5, %mul6
%arrayidx8 = getelementptr inbounds i32* %A, i64 %add7
%0 = load i32* %arrayidx8, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
store i32 %0, i32* %B.addr.11, align 4
%inc = add nsw i64 %j.02, 1
@@ -146,14 +164,21 @@ for.end11: ; preds = %for.inc9
;; for (long int i = 0; i < 100; i++)
-;; for (long int j = 0; j < 100; j++)
-;; A[i + 2*j] = ...
-;; ... = A[i + 2*j - 1];
+;; for (long int j = 0; j < 100; j++) {
+;; A[i + 2*j] = i;
+;; *B++ = A[i + 2*j - 1];
define void @gcd3(i32* %A, i32* %B) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
+; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - flow [<> *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
for.cond1.preheader: ; preds = %entry, %for.inc7
%B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc7 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc8, %for.inc7 ]
@@ -172,7 +197,6 @@ for.body3: ; preds = %for.cond1.preheader
%sub = add nsw i64 %add5, -1
%arrayidx6 = getelementptr inbounds i32* %A, i64 %sub
%0 = load i32* %arrayidx6, align 4
-; CHECK: da analyze - flow [<> *]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
store i32 %0, i32* %B.addr.11, align 4
%inc = add nsw i64 %j.02, 1
@@ -190,16 +214,22 @@ for.end9: ; preds = %for.inc7
}
-;; void gcd4(int *A, int *B, long int M, long int N) {
-;; for (long int i = 0; i < 100; i++)
-;; for (long int j = 0; j < 100; j++) {
-;; A[5*i + 10*j*M + 9*M*N] = i;
-;; *B++ = A[15*i + 20*j*M - 21*N*M + 4];
+;; for (long int i = 0; i < 100; i++)
+;; for (long int j = 0; j < 100; j++) {
+;; A[5*i + 10*j*M + 9*M*N] = i;
+;; *B++ = A[15*i + 20*j*M - 21*N*M + 4];
define void @gcd4(i32* %A, i32* %B, i64 %M, i64 %N) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
+; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
for.cond1.preheader: ; preds = %entry, %for.inc17
%B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc17 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc18, %for.inc17 ]
@@ -228,7 +258,6 @@ for.body3: ; preds = %for.cond1.preheader
%add15 = add nsw i64 %sub, 4
%arrayidx16 = getelementptr inbounds i32* %A, i64 %add15
%0 = load i32* %arrayidx16, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
store i32 %0, i32* %B.addr.11, align 4
%inc = add nsw i64 %j.02, 1
@@ -246,16 +275,22 @@ for.end19: ; preds = %for.inc17
}
-;; void gcd5(int *A, int *B, long int M, long int N) {
-;; for (long int i = 0; i < 100; i++)
-;; for (long int j = 0; j < 100; j++) {
-;; A[5*i + 10*j*M + 9*M*N] = i;
-;; *B++ = A[15*i + 20*j*M - 21*N*M + 5];
+;; for (long int i = 0; i < 100; i++)
+;; for (long int j = 0; j < 100; j++) {
+;; A[5*i + 10*j*M + 9*M*N] = i;
+;; *B++ = A[15*i + 20*j*M - 21*N*M + 5];
define void @gcd5(i32* %A, i32* %B, i64 %M, i64 %N) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
+; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - flow [<> *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
for.cond1.preheader: ; preds = %entry, %for.inc17
%B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc17 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc18, %for.inc17 ]
@@ -284,7 +319,6 @@ for.body3: ; preds = %for.cond1.preheader
%add15 = add nsw i64 %sub, 5
%arrayidx16 = getelementptr inbounds i32* %A, i64 %add15
%0 = load i32* %arrayidx16, align 4
-; CHECK: da analyze - flow [<> *]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
store i32 %0, i32* %B.addr.11, align 4
%inc = add nsw i64 %j.02, 1
@@ -302,17 +336,23 @@ for.end19: ; preds = %for.inc17
}
-;; void gcd6(long int n, int A[][n], int *B) {
-;; for (long int i = 0; i < n; i++)
-;; for (long int j = 0; j < n; j++) {
-;; A[2*i][4*j] = i;
-;; *B++ = A[8*i][6*j + 1];
+;; for (long int i = 0; i < n; i++)
+;; for (long int j = 0; j < n; j++) {
+;; A[2*i][4*j] = i;
+;; *B++ = A[8*i][6*j + 1];
define void @gcd6(i64 %n, i32* %A, i32* %B) nounwind uwtable ssp {
entry:
%cmp4 = icmp sgt i64 %n, 0
br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end12
+; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* *|<]!
+
for.cond1.preheader.preheader: ; preds = %entry
br label %for.cond1.preheader
@@ -342,7 +382,6 @@ for.body3: ; preds = %for.body3.preheader
%arrayidx8.sum = add i64 %1, %add7
%arrayidx9 = getelementptr inbounds i32* %A, i64 %arrayidx8.sum
%2 = load i32* %arrayidx9, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1
store i32 %2, i32* %B.addr.12, align 4
%inc = add nsw i64 %j.03, 1
@@ -367,11 +406,10 @@ for.end12: ; preds = %for.end12.loopexit,
}
-;; void gcd7(int n, int A[][n], int *B) {
-;; for (int i = 0; i < n; i++)
-;; for (int j = 0; j < n; j++) {
-;; A[2*i][4*j] = i;
-;; *B++ = A[8*i][6*j + 1];
+;; for (int i = 0; i < n; i++)
+;; for (int j = 0; j < n; j++) {
+;; A[2*i][4*j] = i;
+;; *B++ = A[8*i][6*j + 1];
define void @gcd7(i32 %n, i32* %A, i32* %B) nounwind uwtable ssp {
entry:
@@ -379,6 +417,13 @@ entry:
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end15
+; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - flow [* *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* *|<]!
+
for.cond1.preheader.preheader: ; preds = %entry
br label %for.cond1.preheader
@@ -419,7 +464,6 @@ for.body3: ; preds = %for.body3.preheader
%arrayidx11.sum = add i64 %10, %idxprom8
%arrayidx12 = getelementptr inbounds i32* %A, i64 %arrayidx11.sum
%11 = load i32* %arrayidx12, align 4
-; CHECK: da analyze - flow [* *|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1
store i32 %11, i32* %B.addr.12, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
@@ -446,17 +490,23 @@ for.end15: ; preds = %for.end15.loopexit,
}
-;; void gcd8(int n, int *A, int *B) {
-;; for (int i = 0; i < n; i++)
-;; for (int j = 0; j < n; j++) {
-;; A[n*2*i + 4*j] = i;
-;; *B++ = A[n*8*i + 6*j + 1];
+;; for (int i = 0; i < n; i++)
+;; for (int j = 0; j < n; j++) {
+;; A[n*2*i + 4*j] = i;
+;; *B++ = A[n*8*i + 6*j + 1];
define void @gcd8(i32 %n, i32* %A, i32* %B) nounwind uwtable ssp {
entry:
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end15
+; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* *|<]!
+
for.cond1.preheader.preheader: ; preds = %entry
br label %for.cond1.preheader
@@ -492,7 +542,6 @@ for.body3: ; preds = %for.body3.preheader
%idxprom11 = sext i32 %add10 to i64
%arrayidx12 = getelementptr inbounds i32* %A, i64 %idxprom11
%5 = load i32* %arrayidx12, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1
store i32 %5, i32* %B.addr.12, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
@@ -518,11 +567,10 @@ for.end15: ; preds = %for.end15.loopexit,
}
-;; void gcd9(unsigned n, int A[][n], int *B) {
-;; for (unsigned i = 0; i < n; i++)
-;; for (unsigned j = 0; j < n; j++) {
-;; A[2*i][4*j] = i;
-;; *B++ = A[8*i][6*j + 1];
+;; for (unsigned i = 0; i < n; i++)
+;; for (unsigned j = 0; j < n; j++) {
+;; A[2*i][4*j] = i;
+;; *B++ = A[8*i][6*j + 1];
define void @gcd9(i32 %n, i32* %A, i32* %B) nounwind uwtable ssp {
entry:
@@ -530,6 +578,13 @@ entry:
%cmp4 = icmp eq i32 %n, 0
br i1 %cmp4, label %for.end15, label %for.cond1.preheader.preheader
+; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - flow [* *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* *|<]!
+
for.cond1.preheader.preheader: ; preds = %entry
br label %for.cond1.preheader
@@ -570,7 +625,6 @@ for.body3: ; preds = %for.body3.preheader
%arrayidx11.sum = add i64 %10, %idxprom8
%arrayidx12 = getelementptr inbounds i32* %A, i64 %arrayidx11.sum
%11 = load i32* %arrayidx12, align 4
-; CHECK: da analyze - flow [* *|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1
store i32 %11, i32* %B.addr.12, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/test/Analysis/DependenceAnalysis/Preliminary.ll b/test/Analysis/DependenceAnalysis/Preliminary.ll
index 3ef63fd559..97589db300 100644
--- a/test/Analysis/DependenceAnalysis/Preliminary.ll
+++ b/test/Analysis/DependenceAnalysis/Preliminary.ll
@@ -1,111 +1,147 @@
-; RUN: opt < %s -analyze -basicaa -indvars -da | FileCheck %s
-
-; This series of tests is more interesting when debugging is enabled.
+; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
; ModuleID = 'Preliminary.bc'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.6.0"
-;; may alias
-;; int p0(int n, int *A, int *B) {
+;;int p0(int n, int *A, int *B) {
;; A[0] = n;
;; return B[1];
define i32 @p0(i32 %n, i32* %A, i32* %B) nounwind uwtable ssp {
entry:
store i32 %n, i32* %A, align 4
+
+; CHECK: da analyze - consistent output!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input!
+
%arrayidx1 = getelementptr inbounds i32* %B, i64 1
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - confused!
ret i32 %0
}
-;; no alias
-;; int p1(int n, int *restrict A, int *restrict B) {
+;;int p1(int n, int *restrict A, int *restrict B) {
;; A[0] = n;
;; return B[1];
define i32 @p1(i32 %n, i32* noalias %A, i32* noalias %B) nounwind uwtable ssp {
entry:
store i32 %n, i32* %A, align 4
+
+; CHECK: da analyze - consistent output!
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent input!
+
%arrayidx1 = getelementptr inbounds i32* %B, i64 1
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
ret i32 %0
}
-;; check loop nesting levels
-;; for (long int i = 0; i < n; i++)
-;; for (long int j = 0; j < n; j++)
-;; for (long int k = 0; k < n; k++)
-;; A[i][j][k] = ...
-;; for (long int k = 0; k < n; k++)
-;; ... = A[i + 3][j + 2][k + 1];
+
+;; for (long int i = 0; i < n; i++) {
+;; for (long int j = 0; j < n; j++) {
+;; for (long int k = 0; k < n; k++) {
+;; A[i][j][k] = i;
+;; }
+;; for (long int k = 0; k < n; k++) {
+;; *B++ = A[i + 3][j + 2][k + 1];
define void @p2(i64 %n, [100 x [100 x i64]]* %A, i64* %B) nounwind uwtable ssp {
entry:
%cmp10 = icmp sgt i64 %n, 0
- br i1 %cmp10, label %for.cond1.preheader, label %for.end26
+ br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
-for.cond1.preheader: ; preds = %for.inc24, %entry
- %B.addr.012 = phi i64* [ %B.addr.1.lcssa, %for.inc24 ], [ %B, %entry ]
- %i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %entry ]
+; CHECK: da analyze - consistent output [0 0 0|<]!
+; CHECK: da analyze - flow [-3 -2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0 0 0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* * *|<]!
+
+for.cond1.preheader.preheader: ; preds = %entry
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
+ %B.addr.012 = phi i64* [ %B.addr.1.lcssa, %for.inc24 ], [ %B, %for.cond1.preheader.preheader ]
+ %i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
%cmp26 = icmp sgt i64 %n, 0
- br i1 %cmp26, label %for.cond4.preheader, label %for.inc24
+ br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
-for.cond4.preheader: ; preds = %for.inc21, %for.cond1.preheader
- %B.addr.18 = phi i64* [ %B.addr.2.lcssa, %for.inc21 ], [ %B.addr.012, %for.cond1.preheader ]
- %j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond1.preheader ]
+for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
+ br label %for.cond4.preheader
+
+for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
+ %B.addr.18 = phi i64* [ %B.addr.2.lcssa, %for.inc21 ], [ %B.addr.012, %for.cond4.preheader.preheader ]
+ %j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
%cmp51 = icmp sgt i64 %n, 0
- br i1 %cmp51, label %for.body6, label %for.cond10.loopexit
+ br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
+
+for.body6.preheader: ; preds = %for.cond4.preheader
+ br label %for.body6
-for.body6: ; preds = %for.body6, %for.cond4.preheader
- %k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.cond4.preheader ]
+for.body6: ; preds = %for.body6.preheader, %for.body6
+ %k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
%arrayidx8 = getelementptr inbounds [100 x [100 x i64]]* %A, i64 %i.011, i64 %j.07, i64 %k.02
store i64 %i.011, i64* %arrayidx8, align 8
%inc = add nsw i64 %k.02, 1
- %cmp5 = icmp slt i64 %inc, %n
- br i1 %cmp5, label %for.body6, label %for.cond10.loopexit
+ %exitcond13 = icmp ne i64 %inc, %n
+ br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
-for.cond10.loopexit: ; preds = %for.body6, %for.cond4.preheader
+for.cond10.loopexit.loopexit: ; preds = %for.body6
+ br label %for.cond10.loopexit
+
+for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
%cmp113 = icmp sgt i64 %n, 0
- br i1 %cmp113, label %for.body12, label %for.inc21
+ br i1 %cmp113, label %for.body12.preheader, label %for.inc21
+
+for.body12.preheader: ; preds = %for.cond10.loopexit
+ br label %for.body12
-for.body12: ; preds = %for.body12, %for.cond10.loopexit
- %k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.cond10.loopexit ]
- %B.addr.24 = phi i64* [ %incdec.ptr, %for.body12 ], [ %B.addr.18, %for.cond10.loopexit ]
+for.body12: ; preds = %for.body12.preheader, %for.body12
+ %k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
+ %B.addr.24 = phi i64* [ %incdec.ptr, %for.body12 ], [ %B.addr.18, %for.body12.preheader ]
%add = add nsw i64 %k9.05, 1
%add13 = add nsw i64 %j.07, 2
%add14 = add nsw i64 %i.011, 3
%arrayidx17 = getelementptr inbounds [100 x [100 x i64]]* %A, i64 %add14, i64 %add13, i64 %add
%0 = load i64* %arrayidx17, align 8
-; CHECK: da analyze - flow [-3 -2]!
%incdec.ptr = getelementptr inbounds i64* %B.addr.24, i64 1
store i64 %0, i64* %B.addr.24, align 8
%inc19 = add nsw i64 %k9.05, 1
- %cmp11 = icmp slt i64 %inc19, %n
- br i1 %cmp11, label %for.body12, label %for.inc21
+ %exitcond = icmp ne i64 %inc19, %n
+ br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
-for.inc21: ; preds = %for.body12, %for.cond10.loopexit
- %B.addr.2.lcssa = phi i64* [ %B.addr.18, %for.cond10.loopexit ], [ %incdec.ptr, %for.body12 ]
+for.inc21.loopexit: ; preds = %for.body12
+ %scevgep = getelementptr i64* %B.addr.18, i64 %n
+ br label %for.inc21
+
+for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
+ %B.addr.2.lcssa = phi i64* [ %B.addr.18, %for.cond10.loopexit ], [ %scevgep, %for.inc21.loopexit ]
%inc22 = add nsw i64 %j.07, 1
- %cmp2 = icmp slt i64 %inc22, %n
- br i1 %cmp2, label %for.cond4.preheader, label %for.inc24
+ %exitcond14 = icmp ne i64 %inc22, %n
+ br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
+
+for.inc24.loopexit: ; preds = %for.inc21
+ %B.addr.2.lcssa.lcssa = phi i64* [ %B.addr.2.lcssa, %for.inc21 ]
+ br label %for.inc24
-for.inc24: ; preds = %for.inc21, %for.cond1.preheader
- %B.addr.1.lcssa = phi i64* [ %B.addr.012, %for.cond1.preheader ], [ %B.addr.2.lcssa, %for.inc21 ]
+for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
+ %B.addr.1.lcssa = phi i64* [ %B.addr.012, %for.cond1.preheader ], [ %B.addr.2.lcssa.lcssa, %for.inc24.loopexit ]
%inc25 = add nsw i64 %i.011, 1
- %cmp = icmp slt i64 %inc25, %n
- br i1 %cmp, label %for.cond1.preheader, label %for.end26
+ %exitcond15 = icmp ne i64 %inc25, %n
+ br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
+
+for.end26.loopexit: ; preds = %for.inc24
+ br label %for.end26
-for.end26: ; preds = %for.inc24, %entry
+for.end26: ; preds = %for.end26.loopexit, %entry
ret void
}
-;; classify subscripts
;; for (long int i = 0; i < n; i++)
;; for (long int j = 0; j < n; j++)
;; for (long int k = 0; k < n; k++)
@@ -118,83 +154,127 @@ for.end26: ; preds = %for.inc24, %entry
;; for (long int s = 0; s < n; s++)
;; for (long int u = 0; u < n; u++)
;; for (long int t = 0; t < n; t++) {
-;; A[i - 3] [j] [2] [k-1] [2*l + 1] [m] [p + q] [r + s] = ...
-;; ... = A[i + 3] [2] [u] [1-k] [3*l - 1] [o] [1 + n] [t + 2];
+;; A[i - 3] [j] [2] [k-1] [2*l + 1] [m] [p + q] [r + s] = i;
+;; *B++ = A[i + 3] [2] [u] [1-k] [3*l - 1] [o] [1 + n] [t + 2];
define void @p3(i64 %n, [100 x [100 x [100 x [100 x [100 x [100 x [100 x i64]]]]]]]* %A, i64* %B) nounwind uwtable ssp {
entry:
%cmp44 = icmp sgt i64 %n, 0
- br i1 %cmp44, label %for.cond1.preheader, label %for.end90
+ br i1 %cmp44, label %for.cond1.preheader.preheader, label %for.end90
+
+; CHECK: da analyze - output [0 0 0 0 0 S * * * * S S|<]!
+; CHECK: da analyze - flow [-6 * * => * * * * * * * *] splitable!
+; CHECK: da analyze - split level = 3, iteration = 1!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0 S 0 0 S 0 S S S S 0 0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* * * * * * * * * * * *|<]!
-for.cond1.preheader: ; preds = %for.inc88, %entry
- %B.addr.046 = phi i64* [ %B.addr.1.lcssa, %for.inc88 ], [ %B, %entry ]
- %i.045 = phi i64 [ %inc89, %for.inc88 ], [ 0, %entry ]
+for.cond1.preheader.preheader: ; preds = %entry
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc88
+ %B.addr.046 = phi i64* [ %B.addr.1.lcssa, %for.inc88 ], [ %B, %for.cond1.preheader.preheader ]
+ %i.045 = phi i64 [ %inc89, %for.inc88 ], [ 0, %for.cond1.preheader.preheader ]
%cmp240 = icmp sgt i64 %n, 0
- br i1 %cmp240, label %for.cond4.preheader, label %for.inc88
+ br i1 %cmp240, label %for.cond4.preheader.preheader, label %for.inc88
+
+for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
+ br label %for.cond4.preheader
-for.cond4.preheader: ; preds = %for.inc85, %for.cond1.preheader
- %B.addr.142 = phi i64* [ %B.addr.2.lcssa, %for.inc85 ], [ %B.addr.046, %for.cond1.preheader ]
- %j.041 = phi i64 [ %inc86, %for.inc85 ], [ 0, %for.cond1.preheader ]
+for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc85
+ %B.addr.142 = phi i64* [ %B.addr.2.lcssa, %for.inc85 ], [ %B.addr.046, %for.cond4.preheader.preheader ]
+ %j.041 = phi i64 [ %inc86, %for.inc85 ], [ 0, %for.cond4.preheader.preheader ]
%cmp536 = icmp sgt i64 %n, 0
- br i1 %cmp536, label %for.cond7.preheader, label %for.inc85
+ br i1 %cmp536, label %for.cond7.preheader.preheader, label %for.inc85
-for.cond7.preheader: ; preds = %for.inc82, %for.cond4.preheader
- %B.addr.238 = phi i64* [ %B.addr.3.lcssa, %for.inc82 ], [ %B.addr.142, %for.cond4.preheader ]
- %k.037 = phi i64 [ %inc83, %for.inc82 ], [ 0, %for.cond4.preheader ]
+for.cond7.preheader.preheader: ; preds = %for.cond4.preheader
+ br label %for.cond7.preheader
+
+for.cond7.preheader: ; preds = %for.cond7.preheader.preheader, %for.inc82
+ %B.addr.238 = phi i64* [ %B.addr.3.lcssa, %for.inc82 ], [ %B.addr.142, %for.cond7.preheader.preheader ]
+ %k.037 = phi i64 [ %inc83, %for.inc82 ], [ 0, %for.cond7.preheader.preheader ]
%cmp832 = icmp sgt i64 %n, 0
- br i1 %cmp832, label %for.cond10.preheader, label %for.inc82
+ br i1 %cmp832, label %for.cond10.preheader.preheader, label %for.inc82
+
+for.cond10.preheader.preheader: ; preds = %for.cond7.preheader
+ br label %for.cond10.preheader
-for.cond10.preheader: ; preds = %for.inc79, %for.cond7.preheader
- %B.addr.334 = phi i64* [ %B.addr.4.lcssa, %for.inc79 ], [ %B.addr.238, %for.cond7.preheader ]
- %l.033 = phi i64 [ %inc80, %for.inc79 ], [ 0, %for.cond7.preheader ]
+for.cond10.preheader: ; preds = %for.cond10.preheader.preheader, %for.inc79
+ %B.addr.334 = phi i64* [ %B.addr.4.lcssa, %for.inc79 ], [ %B.addr.238, %for.cond10.preheader.preheader ]
+ %l.033 = phi i64 [ %inc80, %for.inc79 ], [ 0, %for.cond10.preheader.preheader ]
%cmp1128 = icmp sgt i64 %n, 0
- br i1 %cmp1128, label %for.cond13.preheader, label %for.inc79
+ br i1 %cmp1128, label %for.cond13.preheader.preheader, label %for.inc79
+
+for.cond13.preheader.preheader: ; preds = %for.cond10.preheader
+ br label %for.cond13.preheader
-for.cond13.preheader: ; preds = %for.inc76, %for.cond10.preheader
- %B.addr.430 = phi i64* [ %B.addr.5.lcssa, %for.inc76 ], [ %B.addr.334, %for.cond10.preheader ]
- %m.029 = phi i64 [ %inc77, %for.inc76 ], [ 0, %for.cond10.preheader ]
+for.cond13.preheader: ; preds = %for.cond13.preheader.preheader, %for.inc76
+ %B.addr.430 = phi i64* [ %B.addr.5.lcssa, %for.inc76 ], [ %B.addr.334, %for.cond13.preheader.preheader ]
+ %m.029 = phi i64 [ %inc77, %for.inc76 ], [ 0, %for.cond13.preheader.preheader ]
%cmp1424 = icmp sgt i64 %n, 0
- br i1 %cmp1424, label %for.cond16.preheader, label %for.inc76
+ br i1 %cmp1424, label %for.cond16.preheader.preheader, label %for.inc76
-for.cond16.preheader: ; preds = %for.inc73, %for.cond13.preheader
- %B.addr.526 = phi i64* [ %B.addr.6.lcssa, %for.inc73 ], [ %B.addr.430, %for.cond13.preheader ]
- %o.025 = phi i64 [ %inc74, %for.inc73 ], [ 0, %for.cond13.preheader ]
+for.cond16.preheader.preheader: ; preds = %for.cond13.preheader
+ br label %for.cond16.preheader
+
+for.cond16.preheader: ; preds = %for.cond16.preheader.preheader, %for.inc73
+ %B.addr.526 = phi i64* [ %B.addr.6.lcssa, %for.inc73 ], [ %B.addr.430, %for.cond16.preheader.preheader ]
+ %o.025 = phi i64 [ %inc74, %for.inc73 ], [ 0, %for.cond16.preheader.preheader ]
%cmp1720 = icmp sgt i64 %n, 0
- br i1 %cmp1720, label %for.cond19.preheader, label %for.inc73
+ br i1 %cmp1720, label %for.cond19.preheader.preheader, label %for.inc73
+
+for.cond19.preheader.preheader: ; preds = %for.cond16.preheader
+ br label %for.cond19.preheader
-for.cond19.preheader: ; preds = %for.inc70, %for.cond16.preheader
- %B.addr.622 = phi i64* [ %B.addr.7.lcssa, %for.inc70 ], [ %B.addr.526, %for.cond16.preheader ]
- %p.021 = phi i64 [ %inc71, %for.inc70 ], [ 0, %for.cond16.preheader ]
+for.cond19.preheader: ; preds = %for.cond19.preheader.preheader, %for.inc70
+ %B.addr.622 = phi i64* [ %B.addr.7.lcssa, %for.inc70 ], [ %B.addr.526, %for.cond19.preheader.preheader ]
+ %p.021 = phi i64 [ %inc71, %for.inc70 ], [ 0, %for.cond19.preheader.preheader ]
%cmp2016 = icmp sgt i64 %n, 0
- br i1 %cmp2016, label %for.cond22.preheader, label %for.inc70
+ br i1 %cmp2016, label %for.cond22.preheader.preheader, label %for.inc70
-for.cond22.preheader: ; preds = %for.inc67, %for.cond19.preheader
- %B.addr.718 = phi i64* [ %B.addr.8.lcssa, %for.inc67 ], [ %B.addr.622, %for.cond19.preheader ]
- %q.017 = phi i64 [ %inc68, %for.inc67 ], [ 0, %for.cond19.preheader ]
+for.cond22.preheader.preheader: ; preds = %for.cond19.preheader
+ br label %for.cond22.preheader
+
+for.cond22.preheader: ; preds = %for.cond22.preheader.preheader, %for.inc67
+ %B.addr.718 = phi i64* [ %B.addr.8.lcssa, %for.inc67 ], [ %B.addr.622, %for.cond22.preheader.preheader ]
+ %q.017 = phi i64 [ %inc68, %for.inc67 ], [ 0, %for.cond22.preheader.preheader ]
%cmp2312 = icmp sgt i64 %n, 0
- br i1 %cmp2312, label %for.cond25.preheader, label %for.inc67
+ br i1 %cmp2312, label %for.cond25.preheader.preheader, label %for.inc67
+
+for.cond25.preheader.preheader: ; preds = %for.cond22.preheader
+ br label %for.cond25.preheader
-for.cond25.preheader: ; preds = %for.inc64, %for.cond22.preheader
- %B.addr.814 = phi i64* [ %B.addr.9.lcssa, %for.inc64 ], [ %B.addr.718, %for.cond22.preheader ]
- %r.013 = phi i64 [ %inc65, %for.inc64 ], [ 0, %for.cond22.preheader ]
+for.cond25.preheader: ; preds = %for.cond25.preheader.preheader, %for.inc64
+ %B.addr.814 = phi i64* [ %B.addr.9.lcssa, %for.inc64 ], [ %B.addr.718, %for.cond25.preheader.preheader ]
+ %r.013 = phi i64 [ %inc65, %for.inc64 ], [ 0, %for.cond25.preheader.preheader ]
%cmp268 = icmp sgt i64 %n, 0
- br i1 %cmp268, label %for.cond28.preheader, label %for.inc64
+ br i1 %cmp268, label %for.cond28.preheader.preheader, label %for.inc64
-for.cond28.preheader: ; preds = %for.inc61, %for.cond25.preheader
- %B.addr.910 = phi i64* [ %B.addr.10.lcssa, %for.inc61 ], [ %B.addr.814, %for.cond25.preheader ]
- %s.09 = phi i64 [ %inc62, %for.inc61 ], [ 0, %for.cond25.preheader ]
+for.cond28.preheader.preheader: ; preds = %for.cond25.preheader
+ br label %for.cond28.preheader
+
+for.cond28.preheader: ; preds = %for.cond28.preheader.preheader, %for.inc61
+ %B.addr.910 = phi i64* [ %B.addr.10.lcssa, %for.inc61 ], [ %B.addr.814, %for.cond28.preheader.preheader ]
+ %s.09 = phi i64 [ %inc62, %for.inc61 ], [ 0, %for.cond28.preheader.preheader ]
%cmp294 = icmp sgt i64 %n, 0
- br i1 %cmp294, label %for.cond31.preheader, label %for.inc61
+ br i1 %cmp294, label %for.cond31.preheader.preheader, label %for.inc61
+
+for.cond31.preheader.preheader: ; preds = %for.cond28.preheader
+ br label %for.cond31.preheader
-for.cond31.preheader: ; preds = %for.inc58, %for.cond28.preheader
- %u.06 = phi i64 [ %inc59, %for.inc58 ], [ 0, %for.cond28.preheader ]
- %B.addr.105 = phi i64* [ %B.addr.11.lcssa, %for.inc58 ], [ %B.addr.910, %for.cond28.preheader ]
+for.cond31.preheader: ; preds = %for.cond31.preheader.preheader, %for.inc58
+ %u.06 = phi i64 [ %inc59, %for.inc58 ], [ 0, %for.cond31.preheader.preheader ]
+ %B.addr.105 = phi i64* [ %B.addr.11.lcssa, %for.inc58 ], [ %B.addr.910, %for.cond31.preheader.preheader ]
%cmp321 = icmp sgt i64 %n, 0
- br i1 %cmp321, label %for.body33, label %for.inc58
+ br i1 %cmp321, label %for.body33.preheader, label %for.inc58
+
+for.body33.preheader: ; preds = %for.cond31.preheader
+ br label %for.body33
-for.body33: ; preds = %for.body33, %for.cond31.preheader
- %t.03 = phi i64 [ %inc, %for.body33 ], [ 0, %for.cond31.preheader ]
- %B.addr.112 = phi i64* [ %incdec.ptr, %for.body33 ], [ %B.addr.105, %for.cond31.preheader ]
+for.body33: ; preds = %for.body33.preheader, %for.body33
+ %t.03 = phi i64 [ %inc, %for.body33 ], [ 0, %for.body33.preheader ]
+ %B.addr.112 = phi i64* [ %incdec.ptr, %for.body33 ], [ %B.addr.105, %for.body33.preheader ]
%add = add nsw i64 %r.013, %s.09
%add34 = add nsw i64 %p.021, %q.017
%mul = shl nsw i64 %l.033, 1
@@ -211,99 +291,153 @@ for.body33: ; preds = %for.body33, %for.co
%add49 = add nsw i64 %i.045, 3
%arrayidx57 = getelementptr inbounds [100 x [100 x [100 x [100 x [100 x [100 x [100 x i64]]]]]]]* %A, i64 %add49, i64 2, i64 %u.06, i64 %sub48, i64 %sub47, i64 %o.025, i64 %add45, i64 %add44
%0 = load i64* %arrayidx57, align 8
-; CHECK: da analyze - flow [-6 * * => * * * * * * * *] splitable!
-; CHECK: da analyze - split level = 3, iteration = 1!
%incdec.ptr = getelementptr inbounds i64* %B.addr.112, i64 1
store i64 %0, i64* %B.addr.112, align 8
%inc = add nsw i64 %t.03, 1
- %cmp32 = icmp slt i64 %inc, %n
- br i1 %cmp32, label %for.body33, label %for.inc58
+ %exitcond = icmp ne i64 %inc, %n
+ br i1 %exitcond, label %for.body33, label %for.inc58.loopexit
-for.inc58: ; preds = %for.body33, %for.cond31.preheader
- %B.addr.11.lcssa = phi i64* [ %B.addr.105, %for.cond31.preheader ], [ %incdec.ptr, %for.body33 ]
+for.inc58.loopexit: ; preds = %for.body33
+ %scevgep = getelementptr i64* %B.addr.105, i64 %n
+ br label %for.inc58
+
+for.inc58: ; preds = %for.inc58.loopexit, %for.cond31.preheader
+ %B.addr.11.lcssa = phi i64* [ %B.addr.105, %for.cond31.preheader ], [ %scevgep, %for.inc58.loopexit ]
%inc59 = add nsw i64 %u.06, 1
- %cmp29 = icmp slt i64 %inc59, %n
- br i1 %cmp29, label %for.cond31.preheader, label %for.inc61
+ %exitcond48 = icmp ne i64 %inc59, %n
+ br i1 %exitcond48, label %for.cond31.preheader, label %for.inc61.loopexit
+
+for.inc61.loopexit: ; preds = %for.inc58
+ %B.addr.11.lcssa.lcssa = phi i64* [ %B.addr.11.lcssa, %for.inc58 ]
+ br label %for.inc61
-for.inc61: ; preds = %for.inc58, %for.cond28.preheader
- %B.addr.10.lcssa = phi i64* [ %B.addr.910, %for.cond28.preheader ], [ %B.addr.11.lcssa, %for.inc58 ]
+for.inc61: ; preds = %for.inc61.loopexit, %for.cond28.preheader
+ %B.addr.10.lcssa = phi i64* [ %B.addr.910, %for.cond28.preheader ], [ %B.addr.11.lcssa.lcssa, %for.inc61.loopexit ]
%inc62 = add nsw i64 %s.09, 1
- %cmp26 = icmp slt i64 %inc62, %n
- br i1 %cmp26, label %for.cond28.preheader, label %for.inc64
+ %exitcond49 = icmp ne i64 %inc62, %n
+ br i1 %exitcond49, label %for.cond28.preheader, label %for.inc64.loopexit
-for.inc64: ; preds = %for.inc61, %for.cond25.preheader
- %B.addr.9.lcssa = phi i64* [ %B.addr.814, %for.cond25.preheader ], [ %B.addr.10.lcssa, %for.inc61 ]
+for.inc64.loopexit: ; preds = %for.inc61
+ %B.addr.10.lcssa.lcssa = phi i64* [ %B.addr.10.lcssa, %for.inc61 ]
+ br label %for.inc64
+
+for.inc64: ; preds = %for.inc64.loopexit, %for.cond25.preheader
+ %B.addr.9.lcssa = phi i64* [ %B.addr.814, %for.cond25.preheader ], [ %B.addr.10.lcssa.lcssa, %for.inc64.loopexit ]
%inc65 = add nsw i64 %r.013, 1
- %cmp23 = icmp slt i64 %inc65, %n
- br i1 %cmp23, label %for.cond25.preheader, label %for.inc67
+ %exitcond50 = icmp ne i64 %inc65, %n
+ br i1 %exitcond50, label %for.cond25.preheader, label %for.inc67.loopexit
+
+for.inc67.loopexit: ; preds = %for.inc64
+ %B.addr.9.lcssa.lcssa = phi i64* [ %B.addr.9.lcssa, %for.inc64 ]
+ br label %for.inc67
-for.inc67: ; preds = %for.inc64, %for.cond22.preheader
- %B.addr.8.lcssa = phi i64* [ %B.addr.718, %for.cond22.preheader ], [ %B.addr.9.lcssa, %for.inc64 ]
+for.inc67: ; preds = %for.inc67.loopexit, %for.cond22.preheader
+ %B.addr.8.lcssa = phi i64* [ %B.addr.718, %for.cond22.preheader ], [ %B.addr.9.lcssa.lcssa, %for.inc67.loopexit ]
%inc68 = add nsw i64 %q.017, 1
- %cmp20 = icmp slt i64 %inc68, %n
- br i1 %cmp20, label %for.cond22.preheader, label %for.inc70
+ %exitcond51 = icmp ne i64 %inc68, %n
+ br i1 %exitcond51, label %for.cond22.preheader, label %for.inc70.loopexit
+
+for.inc70.loopexit: ; preds = %for.inc67
+ %B.addr.8.lcssa.lcssa = phi i64* [ %B.addr.8.lcssa, %for.inc67 ]
+ br label %for.inc70
-for.inc70: ; preds = %for.inc67, %for.cond19.preheader
- %B.addr.7.lcssa = phi i64* [ %B.addr.622, %for.cond19.preheader ], [ %B.addr.8.lcssa, %for.inc67 ]
+for.inc70: ; preds = %for.inc70.loopexit, %for.cond19.preheader
+ %B.addr.7.lcssa = phi i64* [ %B.addr.622, %for.cond19.preheader ], [ %B.addr.8.lcssa.lcssa, %for.inc70.loopexit ]
%inc71 = add nsw i64 %p.021, 1
- %cmp17 = icmp slt i64 %inc71, %n
- br i1 %cmp17, label %for.cond19.preheader, label %for.inc73
+ %exitcond52 = icmp ne i64 %inc71, %n
+ br i1 %exitcond52, label %for.cond19.preheader, label %for.inc73.loopexit
-for.inc73: ; preds = %for.inc70, %for.cond16.preheader
- %B.addr.6.lcssa = phi i64* [ %B.addr.526, %for.cond16.preheader ], [ %B.addr.7.lcssa, %for.inc70 ]
+for.inc73.loopexit: ; preds = %for.inc70
+ %B.addr.7.lcssa.lcssa = phi i64* [ %B.addr.7.lcssa, %for.inc70 ]
+ br label %for.inc73
+
+for.inc73: ; preds = %for.inc73.loopexit, %for.cond16.preheader
+ %B.addr.6.lcssa = phi i64* [ %B.addr.526, %for.cond16.preheader ], [ %B.addr.7.lcssa.lcssa, %for.inc73.loopexit ]
%inc74 = add nsw i64 %o.025, 1
- %cmp14 = icmp slt i64 %inc74, %n
- br i1 %cmp14, label %for.cond16.preheader, label %for.inc76
+ %exitcond53 = icmp ne i64 %inc74, %n
+ br i1 %exitcond53, label %for.cond16.preheader, label %for.inc76.loopexit
+
+for.inc76.loopexit: ; preds = %for.inc73
+ %B.addr.6.lcssa.lcssa = phi i64* [ %B.addr.6.lcssa, %for.inc73 ]
+ br label %for.inc76
-for.inc76: ; preds = %for.inc73, %for.cond13.preheader
- %B.addr.5.lcssa = phi i64* [ %B.addr.430, %for.cond13.preheader ], [ %B.addr.6.lcssa, %for.inc73 ]
+for.inc76: ; preds = %for.inc76.loopexit, %for.cond13.preheader
+ %B.addr.5.lcssa = phi i64* [ %B.addr.430, %for.cond13.preheader ], [ %B.addr.6.lcssa.lcssa, %for.inc76.loopexit ]
%inc77 = add nsw i64 %m.029, 1
- %cmp11 = icmp slt i64 %inc77, %n
- br i1 %cmp11, label %for.cond13.preheader, label %for.inc79
+ %exitcond54 = icmp ne i64 %inc77, %n
+ br i1 %exitcond54, label %for.cond13.preheader, label %for.inc79.loopexit
+
+for.inc79.loopexit: ; preds = %for.inc76
+ %B.addr.5.lcssa.lcssa = phi i64* [ %B.addr.5.lcssa, %for.inc76 ]
+ br label %for.inc79
-for.inc79: ; preds = %for.inc76, %for.cond10.preheader
- %B.addr.4.lcssa = phi i64* [ %B.addr.334, %for.cond10.preheader ], [ %B.addr.5.lcssa, %for.inc76 ]
+for.inc79: ; preds = %for.inc79.loopexit, %for.cond10.preheader
+ %B.addr.4.lcssa = phi i64* [ %B.addr.334, %for.cond10.preheader ], [ %B.addr.5.lcssa.lcssa, %for.inc79.loopexit ]
%inc80 = add nsw i64 %l.033, 1
- %cmp8 = icmp slt i64 %inc80, %n
- br i1 %cmp8, label %for.cond10.preheader, label %for.inc82
+ %exitcond55 = icmp ne i64 %inc80, %n
+ br i1 %exitcond55, label %for.cond10.preheader, label %for.inc82.loopexit
-for.inc82: ; preds = %for.inc79, %for.cond7.preheader
- %B.addr.3.lcssa = phi i64* [ %B.addr.238, %for.cond7.preheader ], [ %B.addr.4.lcssa, %for.inc79 ]
+for.inc82.loopexit: ; preds = %for.inc79
+ %B.addr.4.lcssa.lcssa = phi i64* [ %B.addr.4.lcssa, %for.inc79 ]
+ br label %for.inc82
+
+for.inc82: ; preds = %for.inc82.loopexit, %for.cond7.preheader
+ %B.addr.3.lcssa = phi i64* [ %B.addr.238, %for.cond7.preheader ], [ %B.addr.4.lcssa.lcssa, %for.inc82.loopexit ]
%inc83 = add nsw i64 %k.037, 1
- %cmp5 = icmp slt i64 %inc83, %n
- br i1 %cmp5, label %for.cond7.preheader, label %for.inc85
+ %exitcond56 = icmp ne i64 %inc83, %n
+ br i1 %exitcond56, label %for.cond7.preheader, label %for.inc85.loopexit
+
+for.inc85.loopexit: ; preds = %for.inc82
+ %B.addr.3.lcssa.lcssa = phi i64* [ %B.addr.3.lcssa, %for.inc82 ]
+ br label %for.inc85
-for.inc85: ; preds = %for.inc82, %for.cond4.preheader
- %B.addr.2.lcssa = phi i64* [ %B.addr.142, %for.cond4.preheader ], [ %B.addr.3.lcssa, %for.inc82 ]
+for.inc85: ; preds = %for.inc85.loopexit, %for.cond4.preheader
+ %B.addr.2.lcssa = phi i64* [ %B.addr.142, %for.cond4.preheader ], [ %B.addr.3.lcssa.lcssa, %for.inc85.loopexit ]
%inc86 = add nsw i64 %j.041, 1
- %cmp2 = icmp slt i64 %inc86, %n
- br i1 %cmp2, label %for.cond4.preheader, label %for.inc88
+ %exitcond57 = icmp ne i64 %inc86, %n
+ br i1 %exitcond57, label %for.cond4.preheader, label %for.inc88.loopexit
+
+for.inc88.loopexit: ; preds = %for.inc85
+ %B.addr.2.lcssa.lcssa = phi i64* [ %B.addr.2.lcssa, %for.inc85 ]
+ br label %for.inc88
-for.inc88: ; preds = %for.inc85, %for.cond1.preheader
- %B.addr.1.lcssa = phi i64* [ %B.addr.046, %for.cond1.preheader ], [ %B.addr.2.lcssa, %for.inc85 ]
+for.inc88: ; preds = %for.inc88.loopexit, %for.cond1.preheader
+ %B.addr.1.lcssa = phi i64* [ %B.addr.046, %for.cond1.preheader ], [ %B.addr.2.lcssa.lcssa, %for.inc88.loopexit ]
%inc89 = add nsw i64 %i.045, 1
- %cmp = icmp slt i64 %inc89, %n
- br i1 %cmp, label %for.cond1.preheader, label %for.end90
+ %exitcond58 = icmp ne i64 %inc89, %n
+ br i1 %exitcond58, label %for.cond1.preheader, label %for.end90.loopexit
-for.end90: ; preds = %for.inc88, %entry
+for.end90.loopexit: ; preds = %for.inc88
+ br label %for.end90
+
+for.end90: ; preds = %for.end90.loopexit, %entry
ret void
}
-;; cleanup around chars, shorts, ints
-;;void p4(int *A, int *B, long int n)
-;; for (char i = 0; i < n; i++)
-;; A[i + 2] = ...
-;; ... = A[i];
+;;void p4(int *A, int *B, long int n) {
+;; for (char i = 0; i < n; i++) {
+;; A[i + 2] = i;
+;; *B++ = A[i];
define void @p4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp sgt i64 %n, 0
- br i1 %cmp1, label %for.body, label %for.end
+ br i1 %cmp1, label %for.body.preheader, label %for.end
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i8 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+; CHECK: da analyze - output [*|<]!
+; CHECK: da analyze - flow [*|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.03 = phi i8 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
%conv2 = sext i8 %i.03 to i32
%conv3 = sext i8 %i.03 to i64
%add = add i64 %conv3, 2
@@ -312,32 +446,44 @@ for.body: ; preds = %for.body, %entry
%idxprom4 = sext i8 %i.03 to i64
%arrayidx5 = getelementptr inbounds i32* %A, i64 %idxprom4
%0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - flow [*|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc = add i8 %i.03, 1
%conv = sext i8 %inc to i64
%cmp = icmp slt i64 %conv, %n
- br i1 %cmp, label %for.body, label %for.end
+ br i1 %cmp, label %for.body, label %for.end.loopexit
-for.end: ; preds = %for.body, %entry
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
-;;void p5(int *A, int *B, long int n)
-;; for (short i = 0; i < n; i++)
-;; A[i + 2] = ...
-;; ... = A[i];
+;;void p5(int *A, int *B, long int n) {
+;; for (short i = 0; i < n; i++) {
+;; A[i + 2] = i;
+;; *B++ = A[i];
define void @p5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp sgt i64 %n, 0
- br i1 %cmp1, label %for.body, label %for.end
+ br i1 %cmp1, label %for.body.preheader, label %for.end
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i16 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+; CHECK: da analyze - output [*|<]!
+; CHECK: da analyze - flow [*|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.03 = phi i16 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
%conv2 = sext i16 %i.03 to i32
%conv3 = sext i16 %i.03 to i64
%add = add i64 %conv3, 2
@@ -346,124 +492,208 @@ for.body: ; preds = %for.body, %entry
%idxprom4 = sext i16 %i.03 to i64
%arrayidx5 = getelementptr inbounds i32* %A, i64 %idxprom4
%0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - flow [*|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc = add i16 %i.03, 1
%conv = sext i16 %inc to i64
%cmp = icmp slt i64 %conv, %n
- br i1 %cmp, label %for.body, label %for.end
+ br i1 %cmp, label %for.body, label %for.end.loopexit
-for.end: ; preds = %for.body, %entry
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
-;;void p6(int *A, int *B, long int n)
-;; for (int i = 0; i < n; i++)
-;; A[i + 2] = ...
-;; ... = A[i];
+;;void p6(int *A, int *B, long int n) {
+;; for (int i = 0; i < n; i++) {
+;; A[i + 2] = i;
+;; *B++ = A[i];
define void @p6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp sgt i64 %n, 0
- br i1 %cmp1, label %for.body, label %for.end
+ br i1 %cmp1, label %for.body.preheader, label %for.end
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
- %add = add nsw i32 %i.03, 2
- %idxprom = sext i32 %add to i64
- %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
- store i32 %i.03, i32* %arrayidx, align 4
- %idxprom2 = sext i32 %i.03 to i64
- %arrayidx3 = getelementptr inbounds i32* %A, i64 %idxprom2
- %0 = load i32* %arrayidx3, align 4
+; CHECK: da analyze - consistent output [0|<]!
; CHECK: da analyze - consistent flow [2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
+ %0 = add nsw i64 %indvars.iv, 2
+ %arrayidx = getelementptr inbounds i32* %A, i64 %0
+ %1 = trunc i64 %indvars.iv to i32
+ store i32 %1, i32* %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %2 = load i32* %arrayidx3, align 4
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
- store i32 %0, i32* %B.addr.02, align 4
- %inc = add nsw i32 %i.03, 1
- %conv = sext i32 %inc to i64
- %cmp = icmp slt i64 %conv, %n
- br i1 %cmp, label %for.body, label %for.end
+ store i32 %2, i32* %B.addr.02, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
-for.end: ; preds = %for.body, %entry
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
-;;void p7(unsigned *A, unsigned *B, char n)
-;; A[n] = ...
-;; ... = A[n + 1];
+;;void p7(unsigned *A, unsigned *B, char n) {
+;; A[n] = 0;
+;; *B = A[n + 1];
define void @p7(i32* %A, i32* %B, i8 signext %n) nounwind uwtable ssp {
entry:
%idxprom = sext i8 %n to i64
%arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
+
+; CHECK: da analyze - consistent output!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output!
+
store i32 0, i32* %arrayidx, align 4
%conv = sext i8 %n to i64
%add = add i64 %conv, 1
%arrayidx2 = getelementptr inbounds i32* %A, i64 %add
%0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - none!
store i32 %0, i32* %B, align 4
ret void
}
-
-;;void p8(unsigned *A, unsigned *B, short n)
-;; A[n] = ...
-;; ... = A[n + 1];
+;;void p8(unsigned *A, unsigned *B, short n) {
+;; A[n] = 0;
+;; *B = A[n + 1];
define void @p8(i32* %A, i32* %B, i16 signext %n) nounwind uwtable ssp {
entry:
%idxprom = sext i16 %n to i64
%arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
store i32 0, i32* %arrayidx, align 4
+
+; CHECK: da analyze - consistent output!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output!
+
%conv = sext i16 %n to i64
%add = add i64 %conv, 1
%arrayidx2 = getelementptr inbounds i32* %A, i64 %add
%0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - none!
store i32 %0, i32* %B, align 4
ret void
}
-;;void p9(unsigned *A, unsigned *B, int n)
-;; A[n] = ...
-;; ... = A[n + 1];
+;;void p9(unsigned *A, unsigned *B, int n) {
+;; A[n] = 0;
+;; *B = A[n + 1];
define void @p9(i32* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
%idxprom = sext i32 %n to i64
%arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
store i32 0, i32* %arrayidx, align 4
+
+; CHECK: da analyze - consistent output!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output!
+
%add = add nsw i32 %n, 1
%idxprom1 = sext i32 %add to i64
%arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1
%0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - none!
store i32 %0, i32* %B, align 4
ret void
}
-;;void p10(unsigned *A, unsigned *B, unsigned n)
-;; A[n] = ...
-;; ... = A[n + 1];
+;;void p10(unsigned *A, unsigned *B, unsigned n) {
+;; A[n] = 0;
+;; *B = A[n + 1];
define void @p10(i32* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
%idxprom = zext i32 %n to i64
%arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
store i32 0, i32* %arrayidx, align 4
+
+; CHECK: da analyze - consistent output!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output!
+
%add = add i32 %n, 1
%idxprom1 = zext i32 %add to i64
%arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1
%0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - none!
store i32 %0, i32* %B, align 4
ret void
}
+
+
+;;typedef struct { int v; } S;
+;;
+;;void f(S *s, unsigned size) {
+;; S *i = s, *e = s + size - 1;
+;; while (i != e) {
+;; *i = *(i + 1);
+;; ++i;
+
+%struct.S = type { i32 }
+
+define void @f(%struct.S* %s, i32 %size) nounwind uwtable ssp {
+entry:
+ %idx.ext = zext i32 %size to i64
+ %add.ptr.sum = add i64 %idx.ext, -1
+ %add.ptr1 = getelementptr inbounds %struct.S* %s, i64 %add.ptr.sum
+ %cmp1 = icmp eq i64 %add.ptr.sum, 0
+ br i1 %cmp1, label %while.end, label %while.body.preheader
+
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - consistent anti [1]!
+; CHECK: da analyze - consistent output [0|<]!
+
+while.body.preheader: ; preds = %entry
+ br label %while.body
+
+while.body: ; preds = %while.body.preheader, %while.body
+ %i.02 = phi %struct.S* [ %incdec.ptr, %while.body ], [ %s, %while.body.preheader ]
+ %0 = getelementptr inbounds %struct.S* %i.02, i64 1, i32 0
+ %1 = load i32* %0, align 4
+ %2 = getelementptr inbounds %struct.S* %i.02, i64 0, i32 0
+ store i32 %1, i32* %2, align 4
+ %incdec.ptr = getelementptr inbounds %struct.S* %i.02, i64 1
+ %cmp = icmp eq %struct.S* %incdec.ptr, %add.ptr1
+ br i1 %cmp, label %while.end.loopexit, label %while.body
+
+while.end.loopexit: ; preds = %while.body
+ br label %while.end
+
+while.end: ; preds = %while.end.loopexit, %entry
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/Analysis/DependenceAnalysis/Propagating.ll b/test/Analysis/DependenceAnalysis/Propagating.ll
index 076348c68d..32d253593f 100644
--- a/test/Analysis/DependenceAnalysis/Propagating.ll
+++ b/test/Analysis/DependenceAnalysis/Propagating.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx10.6.0"
;; for (long int i = 0; i < 100; i++)
-;; for (long int j = 0; j < 100; j++)
+;; for (long int j = 0; j < 100; j++) {
;; A[i + 1][i + j] = i;
;; *B++ = A[i][i + j];
@@ -14,12 +14,19 @@ define void @prop0([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.inc9, %entry
- %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc9 ]
+; CHECK: da analyze - consistent output [0 0|<]!
+; CHECK: da analyze - consistent flow [1 -1]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0 0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
+for.cond1.preheader: ; preds = %entry, %for.inc9
+ %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ]
br label %for.body3
-for.body3: ; preds = %for.body3, %for.cond1.preheader
+for.body3: ; preds = %for.cond1.preheader, %for.body3
%j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
%B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
%conv = trunc i64 %i.03 to i32
@@ -30,17 +37,17 @@ for.body3: ; preds = %for.body3, %for.con
%add6 = add nsw i64 %i.03, %j.02
%arrayidx8 = getelementptr inbounds [100 x i32]* %A, i64 %i.03, i64 %add6
%0 = load i32* %arrayidx8, align 4
-; CHECK: da analyze - consistent flow [1 -1]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
store i32 %0, i32* %B.addr.11, align 4
%inc = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc, 100
- br i1 %cmp2, label %for.body3, label %for.inc9
+ %exitcond = icmp ne i64 %inc, 100
+ br i1 %exitcond, label %for.body3, label %for.inc9
for.inc9: ; preds = %for.body3
+ %scevgep = getelementptr i32* %B.addr.04, i64 100
%inc10 = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc10, 100
- br i1 %cmp, label %for.cond1.preheader, label %for.end11
+ %exitcond5 = icmp ne i64 %inc10, 100
+ br i1 %exitcond5, label %for.cond1.preheader, label %for.end11
for.end11: ; preds = %for.inc9
ret void
@@ -49,25 +56,32 @@ for.end11: ; preds = %for.inc9
;; for (long int i = 0; i < 100; i++)
;; for (long int j = 0; j < 100; j++)
-;; for (long int k = 0; k < 100; k++)
-;; A[j - i][i + 1][j + k] = ...
-;; ... = A[j - i][i][j + k];
+;; for (long int k = 0; k < 100; k++) {
+;; A[j - i][i + 1][j + k] = i;
+;; *B++ = A[j - i][i][j + k];
define void @prop1([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.inc18, %entry
- %B.addr.06 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc18 ]
+; CHECK: da analyze - consistent output [0 0 0|<]!
+; CHECK: da analyze - consistent flow [1 1 -1]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0 0 0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= = =|<]!
+
+for.cond1.preheader: ; preds = %entry, %for.inc18
+ %B.addr.06 = phi i32* [ %B, %entry ], [ %scevgep7, %for.inc18 ]
%i.05 = phi i64 [ 0, %entry ], [ %inc19, %for.inc18 ]
br label %for.cond4.preheader
-for.cond4.preheader: ; preds = %for.inc15, %for.cond1.preheader
- %B.addr.14 = phi i32* [ %B.addr.06, %for.cond1.preheader ], [ %incdec.ptr, %for.inc15 ]
+for.cond4.preheader: ; preds = %for.cond1.preheader, %for.inc15
+ %B.addr.14 = phi i32* [ %B.addr.06, %for.cond1.preheader ], [ %scevgep, %for.inc15 ]
%j.03 = phi i64 [ 0, %for.cond1.preheader ], [ %inc16, %for.inc15 ]
br label %for.body6
-for.body6: ; preds = %for.body6, %for.cond4.preheader
+for.body6: ; preds = %for.cond4.preheader, %for.body6
%k.02 = phi i64 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
%B.addr.21 = phi i32* [ %B.addr.14, %for.cond4.preheader ], [ %incdec.ptr, %for.body6 ]
%conv = trunc i64 %i.05 to i32
@@ -80,22 +94,23 @@ for.body6: ; preds = %for.body6, %for.con
%sub11 = sub nsw i64 %j.03, %i.05
%arrayidx14 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %sub11, i64 %i.05, i64 %add10
%0 = load i32* %arrayidx14, align 4
-; CHECK: da analyze - consistent flow [1 1 -1]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.21, i64 1
store i32 %0, i32* %B.addr.21, align 4
%inc = add nsw i64 %k.02, 1
- %cmp5 = icmp slt i64 %inc, 100
- br i1 %cmp5, label %for.body6, label %for.inc15
+ %exitcond = icmp ne i64 %inc, 100
+ br i1 %exitcond, label %for.body6, label %for.inc15
for.inc15: ; preds = %for.body6
+ %scevgep = getelementptr i32* %B.addr.14, i64 100
%inc16 = add nsw i64 %j.03, 1
- %cmp2 = icmp slt i64 %inc16, 100
- br i1 %cmp2, label %for.cond4.preheader, label %for.inc18
+ %exitcond8 = icmp ne i64 %inc16, 100
+ br i1 %exitcond8, label %for.cond4.preheader, label %for.inc18
for.inc18: ; preds = %for.inc15
+ %scevgep7 = getelementptr i32* %B.addr.06, i64 10000
%inc19 = add nsw i64 %i.05, 1
- %cmp = icmp slt i64 %inc19, 100
- br i1 %cmp, label %for.cond1.preheader, label %for.end20
+ %exitcond9 = icmp ne i64 %inc19, 100
+ br i1 %exitcond9, label %for.cond1.preheader, label %for.end20
for.end20: ; preds = %for.inc18
ret void
@@ -103,20 +118,27 @@ for.end20: ; preds = %for.inc18
;; for (long int i = 0; i < 100; i++)
-;; for (long int j = 0; j < 100; j++)
-;; A[i - 1][2*i] = ...
-;; ... = A[i][i + j + 110];
+;; for (long int j = 0; j < 100; j++) {
+;; A[i - 1][2*i] = i;
+;; *B++ = A[i][i + j + 110];
define void @prop2([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.inc8, %entry
- %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc8 ]
+; CHECK: da analyze - consistent output [0 S|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0 0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
+for.cond1.preheader: ; preds = %entry, %for.inc8
+ %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc8 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc9, %for.inc8 ]
br label %for.body3
-for.body3: ; preds = %for.body3, %for.cond1.preheader
+for.body3: ; preds = %for.cond1.preheader, %for.body3
%j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
%B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
%conv = trunc i64 %i.03 to i32
@@ -128,17 +150,17 @@ for.body3: ; preds = %for.body3, %for.con
%add5 = add nsw i64 %add, 110
%arrayidx7 = getelementptr inbounds [100 x i32]* %A, i64 %i.03, i64 %add5
%0 = load i32* %arrayidx7, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
store i32 %0, i32* %B.addr.11, align 4
%inc = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc, 100
- br i1 %cmp2, label %for.body3, label %for.inc8
+ %exitcond = icmp ne i64 %inc, 100
+ br i1 %exitcond, label %for.body3, label %for.inc8
for.inc8: ; preds = %for.body3
+ %scevgep = getelementptr i32* %B.addr.04, i64 100
%inc9 = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc9, 100
- br i1 %cmp, label %for.cond1.preheader, label %for.end10
+ %exitcond5 = icmp ne i64 %inc9, 100
+ br i1 %exitcond5, label %for.cond1.preheader, label %for.end10
for.end10: ; preds = %for.inc8
ret void
@@ -146,20 +168,27 @@ for.end10: ; preds = %for.inc8
;; for (long int i = 0; i < 100; i++)
-;; for (long int j = 0; j < 100; j++)
-;; A[i][2*j + i] = ...
-;; ... = A[i][2*j - i + 5];
+;; for (long int j = 0; j < 100; j++) {
+;; A[i][2*j + i] = i;
+;; *B++ = A[i][2*j - i + 5];
define void @prop3([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.inc9, %entry
- %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc9 ]
+; CHECK: da analyze - consistent output [0 0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0 0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
+for.cond1.preheader: ; preds = %entry, %for.inc9
+ %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc10, %for.inc9 ]
br label %for.body3
-for.body3: ; preds = %for.body3, %for.cond1.preheader
+for.body3: ; preds = %for.cond1.preheader, %for.body3
%j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
%B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
%conv = trunc i64 %i.03 to i32
@@ -172,39 +201,45 @@ for.body3: ; preds = %for.body3, %for.con
%add6 = add nsw i64 %sub, 5
%arrayidx8 = getelementptr inbounds [100 x i32]* %A, i64 %i.03, i64 %add6
%0 = load i32* %arrayidx8, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
store i32 %0, i32* %B.addr.11, align 4
%inc = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc, 100
- br i1 %cmp2, label %for.body3, label %for.inc9
+ %exitcond = icmp ne i64 %inc, 100
+ br i1 %exitcond, label %for.body3, label %for.inc9
for.inc9: ; preds = %for.body3
+ %scevgep = getelementptr i32* %B.addr.04, i64 100
%inc10 = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc10, 100
- br i1 %cmp, label %for.cond1.preheader, label %for.end11
+ %exitcond5 = icmp ne i64 %inc10, 100
+ br i1 %exitcond5, label %for.cond1.preheader, label %for.end11
for.end11: ; preds = %for.inc9
ret void
}
-;; propagate Distance
;; for (long int i = 0; i < 100; i++)
-;; for (long int j = 0; j < 100; j++)
-;; A[i + 2][2*i + j + 1] = ...
-;; ... = A[i][2*i + j];
+;; for (long int j = 0; j < 100; j++) {
+;; A[i + 2][2*i + j + 1] = i;
+;; *B++ = A[i][2*i + j];
define void @prop4([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.inc11, %entry
- %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc11 ]
+; CHECK: da analyze - consistent output [0 0|<]!
+; CHECK: da analyze - consistent flow [2 -3]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0 0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
+for.cond1.preheader: ; preds = %entry, %for.inc11
+ %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc11 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc12, %for.inc11 ]
br label %for.body3
-for.body3: ; preds = %for.body3, %for.cond1.preheader
+for.body3: ; preds = %for.cond1.preheader, %for.body3
%j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
%B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
%conv = trunc i64 %i.03 to i32
@@ -218,39 +253,46 @@ for.body3: ; preds = %for.body3, %for.con
%add8 = add nsw i64 %mul7, %j.02
%arrayidx10 = getelementptr inbounds [100 x i32]* %A, i64 %i.03, i64 %add8
%0 = load i32* %arrayidx10, align 4
-; CHECK: da analyze - consistent flow [2 -3]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
store i32 %0, i32* %B.addr.11, align 4
%inc = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc, 100
- br i1 %cmp2, label %for.body3, label %for.inc11
+ %exitcond = icmp ne i64 %inc, 100
+ br i1 %exitcond, label %for.body3, label %for.inc11
for.inc11: ; preds = %for.body3
+ %scevgep = getelementptr i32* %B.addr.04, i64 100
%inc12 = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc12, 100
- br i1 %cmp, label %for.cond1.preheader, label %for.end13
+ %exitcond5 = icmp ne i64 %inc12, 100
+ br i1 %exitcond5, label %for.cond1.preheader, label %for.end13
for.end13: ; preds = %for.inc11
ret void
}
-;; propagate Point
;; for (long int i = 0; i < 100; i++)
-;; for (long int j = 0; j < 100; j++)
-;; A[3*i - 18][22 - i][2*i + j] = ...
-;; ... = A[i][i][3*i + j];
+;; for (long int j = 0; j < 100; j++) {
+;; A[3*i - 18][22 - i][2*i + j] = i;
+;; *B++ = A[i][i][3*i + j];
define void @prop5([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.inc13, %entry
- %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc13 ]
+; CHECK: da analyze - consistent output [0 0|<]!
+; CHECK: da analyze - flow [< -16] splitable!
+; CHECK: da analyze - split level = 1, iteration = 11!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0 0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
+for.cond1.preheader: ; preds = %entry, %for.inc13
+ %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc13 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc14, %for.inc13 ]
br label %for.body3
-for.body3: ; preds = %for.body3, %for.cond1.preheader
+for.body3: ; preds = %for.cond1.preheader, %for.body3
%j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
%B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
%conv = trunc i64 %i.03 to i32
@@ -265,40 +307,45 @@ for.body3: ; preds = %for.body3, %for.con
%add9 = add nsw i64 %mul8, %j.02
%arrayidx12 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 %i.03, i64 %i.03, i64 %add9
%0 = load i32* %arrayidx12, align 4
-; CHECK: da analyze - flow [< -16] splitable!
-; CHECK: da analyze - split level = 1, iteration = 11!
%incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
store i32 %0, i32* %B.addr.11, align 4
%inc = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc, 100
- br i1 %cmp2, label %for.body3, label %for.inc13
+ %exitcond = icmp ne i64 %inc, 100
+ br i1 %exitcond, label %for.body3, label %for.inc13
for.inc13: ; preds = %for.body3
+ %scevgep = getelementptr i32* %B.addr.04, i64 100
%inc14 = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc14, 100
- br i1 %cmp, label %for.cond1.preheader, label %for.end15
+ %exitcond5 = icmp ne i64 %inc14, 100
+ br i1 %exitcond5, label %for.cond1.preheader, label %for.end15
for.end15: ; preds = %for.inc13
ret void
}
-;; propagate Line
;; for (long int i = 0; i < 100; i++)
-;; for (long int j = 0; j < 100; j++)
-;; A[i + 1][4*i + j + 2] = ...
-;; ... = A[2*i][8*i + j];
+;; for (long int j = 0; j < 100; j++) {
+;; A[i + 1][4*i + j + 2] = i;
+;; *B++ = A[2*i][8*i + j];
define void @prop6([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.inc12, %entry
- %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc12 ]
+; CHECK: da analyze - consistent output [0 0|<]!
+; CHECK: da analyze - flow [=> -2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0 0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
+for.cond1.preheader: ; preds = %entry, %for.inc12
+ %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc12 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc13, %for.inc12 ]
br label %for.body3
-for.body3: ; preds = %for.body3, %for.cond1.preheader
+for.body3: ; preds = %for.cond1.preheader, %for.body3
%j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
%B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
%conv = trunc i64 %i.03 to i32
@@ -313,17 +360,17 @@ for.body3: ; preds = %for.body3, %for.con
%mul9 = shl nsw i64 %i.03, 1
%arrayidx11 = getelementptr inbounds [100 x i32]* %A, i64 %mul9, i64 %add8
%0 = load i32* %arrayidx11, align 4
-; CHECK: da analyze - flow [=> -2]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
store i32 %0, i32* %B.addr.11, align 4
%inc = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc, 100
- br i1 %cmp2, label %for.body3, label %for.inc12
+ %exitcond = icmp ne i64 %inc, 100
+ br i1 %exitcond, label %for.body3, label %for.inc12
for.inc12: ; preds = %for.body3
+ %scevgep = getelementptr i32* %B.addr.04, i64 100
%inc13 = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc13, 100
- br i1 %cmp, label %for.cond1.preheader, label %for.end14
+ %exitcond5 = icmp ne i64 %inc13, 100
+ br i1 %exitcond5, label %for.cond1.preheader, label %for.end14
for.end14: ; preds = %for.inc12
ret void
@@ -331,20 +378,28 @@ for.end14: ; preds = %for.inc12
;; for (long int i = 0; i < 100; i++)
-;; for (long int j = 0; j < 100; j++)
-;; A[2*i + 4][-5*i + j + 2] = ...
-;; ... = A[-2*i + 20][5*i + j];
+;; for (long int j = 0; j < 100; j++) {
+;; A[2*i + 4][-5*i + j + 2] = i;
+;; *B++ = A[-2*i + 20][5*i + j];
define void @prop7([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.inc14, %entry
- %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc14 ]
+; CHECK: da analyze - consistent output [0 0|<]!
+; CHECK: da analyze - flow [* -38] splitable!
+; CHECK: da analyze - split level = 1, iteration = 4!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0 0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
+for.cond1.preheader: ; preds = %entry, %for.inc14
+ %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc14 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc15, %for.inc14 ]
br label %for.body3
-for.body3: ; preds = %for.body3, %for.cond1.preheader
+for.body3: ; preds = %for.cond1.preheader, %for.body3
%j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
%B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
%conv = trunc i64 %i.03 to i32
@@ -361,18 +416,17 @@ for.body3: ; preds = %for.body3, %for.con
%add11 = add nsw i64 %mul10, 20
%arrayidx13 = getelementptr inbounds [100 x i32]* %A, i64 %add11, i64 %add9
%0 = load i32* %arrayidx13, align 4
-; CHECK: da analyze - flow [* -38] splitable!
-; CHECK: da analyze - split level = 1, iteration = 4!
%incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
store i32 %0, i32* %B.addr.11, align 4
%inc = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc, 100
- br i1 %cmp2, label %for.body3, label %for.inc14
+ %exitcond = icmp ne i64 %inc, 100
+ br i1 %exitcond, label %for.body3, label %for.inc14
for.inc14: ; preds = %for.body3
+ %scevgep = getelementptr i32* %B.addr.04, i64 100
%inc15 = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc15, 100
- br i1 %cmp, label %for.cond1.preheader, label %for.end16
+ %exitcond5 = icmp ne i64 %inc15, 100
+ br i1 %exitcond5, label %for.cond1.preheader, label %for.end16
for.end16: ; preds = %for.inc14
ret void
@@ -380,20 +434,27 @@ for.end16: ; preds = %for.inc14
;; for (long int i = 0; i < 100; i++)
-;; for (long int j = 0; j < 100; j++)
-;; A[4][j + 2] = ...
-;; ... = A[-2*i + 4][5*i + j];
+;; for (long int j = 0; j < 100; j++) {
+;; A[4][j + 2] = i;
+;; *B++ = A[-2*i + 4][5*i + j];
define void @prop8([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.inc10, %entry
- %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc10 ]
+; CHECK: da analyze - consistent output [S 0|<]!
+; CHECK: da analyze - flow [p<= 2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0 0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
+for.cond1.preheader: ; preds = %entry, %for.inc10
+ %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc10 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc11, %for.inc10 ]
br label %for.body3
-for.body3: ; preds = %for.body3, %for.cond1.preheader
+for.body3: ; preds = %for.cond1.preheader, %for.body3
%j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
%B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
%conv = trunc i64 %i.03 to i32
@@ -406,17 +467,17 @@ for.body3: ; preds = %for.body3, %for.con
%add7 = add nsw i64 %mul6, 4
%arrayidx9 = getelementptr inbounds [100 x i32]* %A, i64 %add7, i64 %add5
%0 = load i32* %arrayidx9, align 4
-; CHECK: da analyze - flow [p<= 2]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
store i32 %0, i32* %B.addr.11, align 4
%inc = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc, 100
- br i1 %cmp2, label %for.body3, label %for.inc10
+ %exitcond = icmp ne i64 %inc, 100
+ br i1 %exitcond, label %for.body3, label %for.inc10
for.inc10: ; preds = %for.body3
+ %scevgep = getelementptr i32* %B.addr.04, i64 100
%inc11 = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc11, 100
- br i1 %cmp, label %for.cond1.preheader, label %for.end12
+ %exitcond5 = icmp ne i64 %inc11, 100
+ br i1 %exitcond5, label %for.cond1.preheader, label %for.end12
for.end12: ; preds = %for.inc10
ret void
@@ -424,20 +485,27 @@ for.end12: ; preds = %for.inc10
;; for (long int i = 0; i < 100; i++)
-;; for (long int j = 0; j < 100; j++)
-;; A[2*i + 4][5*i + j + 2] = ...
-;; ... = A[4][j];
+;; for (long int j = 0; j < 100; j++) {
+;; A[2*i + 4][5*i + j + 2] = i;
+;; *B++ = A[4][j];
define void @prop9([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.inc10, %entry
- %B.addr.04 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc10 ]
+; CHECK: da analyze - consistent output [0 0|<]!
+; CHECK: da analyze - flow [p<= 2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S 0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= =|<]!
+
+for.cond1.preheader: ; preds = %entry, %for.inc10
+ %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc10 ]
%i.03 = phi i64 [ 0, %entry ], [ %inc11, %for.inc10 ]
br label %for.body3
-for.body3: ; preds = %for.body3, %for.cond1.preheader
+for.body3: ; preds = %for.cond1.preheader, %for.body3
%j.02 = phi i64 [ 0, %for.cond1.preheader ], [ %inc, %for.body3 ]
%B.addr.11 = phi i32* [ %B.addr.04, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
%conv = trunc i64 %i.03 to i32
@@ -450,17 +518,17 @@ for.body3: ; preds = %for.body3, %for.con
store i32 %conv, i32* %arrayidx7, align 4
%arrayidx9 = getelementptr inbounds [100 x i32]* %A, i64 4, i64 %j.02
%0 = load i32* %arrayidx9, align 4
-; CHECK: da analyze - flow [p<= 2]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.11, i64 1
store i32 %0, i32* %B.addr.11, align 4
%inc = add nsw i64 %j.02, 1
- %cmp2 = icmp slt i64 %inc, 100
- br i1 %cmp2, label %for.body3, label %for.inc10
+ %exitcond = icmp ne i64 %inc, 100
+ br i1 %exitcond, label %for.body3, label %for.inc10
for.inc10: ; preds = %for.body3
+ %scevgep = getelementptr i32* %B.addr.04, i64 100
%inc11 = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc11, 100
- br i1 %cmp, label %for.cond1.preheader, label %for.end12
+ %exitcond5 = icmp ne i64 %inc11, 100
+ br i1 %exitcond5, label %for.cond1.preheader, label %for.end12
for.end12: ; preds = %for.inc10
ret void
diff --git a/test/Analysis/DependenceAnalysis/Separability.ll b/test/Analysis/DependenceAnalysis/Separability.ll
index d42d3cdb39..beda448e83 100644
--- a/test/Analysis/DependenceAnalysis/Separability.ll
+++ b/test/Analysis/DependenceAnalysis/Separability.ll
@@ -8,30 +8,37 @@ target triple = "x86_64-apple-macosx10.6.0"
;; for (long int i = 0; i < 50; i++)
;; for (long int j = 0; j < 50; j++)
;; for (long int k = 0; k < 50; k++)
-;; for (long int l = 0; l < 50; l++)
-;; A[n][i][j + k] = ...
-;; ... = A[10][i + 10][2*j - l];
+;; for (long int l = 0; l < 50; l++) {
+;; A[n][i][j + k] = i;
+;; *B++ = A[10][i + 10][2*j - l];
define void @sep0([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.inc22, %entry
- %B.addr.08 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc22 ]
+; CHECK: da analyze - output [0 * * S|<]!
+; CHECK: da analyze - flow [-10 * * *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [0 * S *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= = = =|<]!
+
+for.cond1.preheader: ; preds = %entry, %for.inc22
+ %B.addr.08 = phi i32* [ %B, %entry ], [ %scevgep11, %for.inc22 ]
%i.07 = phi i64 [ 0, %entry ], [ %inc23, %for.inc22 ]
br label %for.cond4.preheader
-for.cond4.preheader: ; preds = %for.inc19, %for.cond1.preheader
- %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %incdec.ptr, %for.inc19 ]
+for.cond4.preheader: ; preds = %for.cond1.preheader, %for.inc19
+ %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %scevgep9, %for.inc19 ]
%j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc20, %for.inc19 ]
br label %for.cond7.preheader
-for.cond7.preheader: ; preds = %for.inc16, %for.cond4.preheader
- %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %incdec.ptr, %for.inc16 ]
+for.cond7.preheader: ; preds = %for.cond4.preheader, %for.inc16
+ %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %scevgep, %for.inc16 ]
%k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc17, %for.inc16 ]
br label %for.body9
-for.body9: ; preds = %for.body9, %for.cond7.preheader
+for.body9: ; preds = %for.cond7.preheader, %for.body9
%l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ]
%B.addr.31 = phi i32* [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ]
%conv = trunc i64 %i.07 to i32
@@ -44,27 +51,29 @@ for.body9: ; preds = %for.body9, %for.con
%add12 = add nsw i64 %i.07, 10
%arrayidx15 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 10, i64 %add12, i64 %sub
%0 = load i32* %arrayidx15, align 4
-; CHECK: da analyze - flow [-10 * * *]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.31, i64 1
store i32 %0, i32* %B.addr.31, align 4
%inc = add nsw i64 %l.02, 1
- %cmp8 = icmp slt i64 %inc, 50
- br i1 %cmp8, label %for.body9, label %for.inc16
+ %exitcond = icmp ne i64 %inc, 50
+ br i1 %exitcond, label %for.body9, label %for.inc16
for.inc16: ; preds = %for.body9
+ %scevgep = getelementptr i32* %B.addr.24, i64 50
%inc17 = add nsw i64 %k.03, 1
- %cmp5 = icmp slt i64 %inc17, 50
- br i1 %cmp5, label %for.cond7.preheader, label %for.inc19
+ %exitcond10 = icmp ne i64 %inc17, 50
+ br i1 %exitcond10, label %for.cond7.preheader, label %for.inc19
for.inc19: ; preds = %for.inc16
+ %scevgep9 = getelementptr i32* %B.addr.16, i64 2500
%inc20 = add nsw i64 %j.05, 1
- %cmp2 = icmp slt i64 %inc20, 50
- br i1 %cmp2, label %for.cond4.preheader, label %for.inc22
+ %exitcond12 = icmp ne i64 %inc20, 50
+ br i1 %exitcond12, label %for.cond4.preheader, label %for.inc22
for.inc22: ; preds = %for.inc19
+ %scevgep11 = getelementptr i32* %B.addr.08, i64 125000
%inc23 = add nsw i64 %i.07, 1
- %cmp = icmp slt i64 %inc23, 50
- br i1 %cmp, label %for.cond1.preheader, label %for.end24
+ %exitcond13 = icmp ne i64 %inc23, 50
+ br i1 %exitcond13, label %for.cond1.preheader, label %for.end24
for.end24: ; preds = %for.inc22
ret void
@@ -74,30 +83,37 @@ for.end24: ; preds = %for.inc22
;; for (long int i = 0; i < 50; i++)
;; for (long int j = 0; j < 50; j++)
;; for (long int k = 0; k < 50; k++)
-;; for (long int l = 0; l < 50; l++)
-;; A[i][i][j + k] = ...
-;; ... = A[10][i + 10][2*j - l];
+;; for (long int l = 0; l < 50; l++) {
+;; A[i][i][j + k] = i;
+;; *B++ = A[10][i + 10][2*j - l];
define void @sep1([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.inc22, %entry
- %B.addr.08 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc22 ]
+; CHECK: da analyze - output [0 * * S|<]!
+; CHECK: da analyze - flow [> * * *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [0 * S *|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= = = =|<]!
+
+for.cond1.preheader: ; preds = %entry, %for.inc22
+ %B.addr.08 = phi i32* [ %B, %entry ], [ %scevgep11, %for.inc22 ]
%i.07 = phi i64 [ 0, %entry ], [ %inc23, %for.inc22 ]
br label %for.cond4.preheader
-for.cond4.preheader: ; preds = %for.inc19, %for.cond1.preheader
- %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %incdec.ptr, %for.inc19 ]
+for.cond4.preheader: ; preds = %for.cond1.preheader, %for.inc19
+ %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %scevgep9, %for.inc19 ]
%j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc20, %for.inc19 ]
br label %for.cond7.preheader
-for.cond7.preheader: ; preds = %for.inc16, %for.cond4.preheader
- %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %incdec.ptr, %for.inc16 ]
+for.cond7.preheader: ; preds = %for.cond4.preheader, %for.inc16
+ %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %scevgep, %for.inc16 ]
%k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc17, %for.inc16 ]
br label %for.body9
-for.body9: ; preds = %for.body9, %for.cond7.preheader
+for.body9: ; preds = %for.cond7.preheader, %for.body9
%l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ]
%B.addr.31 = phi i32* [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ]
%conv = trunc i64 %i.07 to i32
@@ -109,27 +125,29 @@ for.body9: ; preds = %for.body9, %for.con
%add12 = add nsw i64 %i.07, 10
%arrayidx15 = getelementptr inbounds [100 x [100 x i32]]* %A, i64 10, i64 %add12, i64 %sub
%0 = load i32* %arrayidx15, align 4
-; CHECK: da analyze - flow [> * * *]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.31, i64 1
store i32 %0, i32* %B.addr.31, align 4
%inc = add nsw i64 %l.02, 1
- %cmp8 = icmp slt i64 %inc, 50
- br i1 %cmp8, label %for.body9, label %for.inc16
+ %exitcond = icmp ne i64 %inc, 50
+ br i1 %exitcond, label %for.body9, label %for.inc16
for.inc16: ; preds = %for.body9
+ %scevgep = getelementptr i32* %B.addr.24, i64 50
%inc17 = add nsw i64 %k.03, 1
- %cmp5 = icmp slt i64 %inc17, 50
- br i1 %cmp5, label %for.cond7.preheader, label %for.inc19
+ %exitcond10 = icmp ne i64 %inc17, 50
+ br i1 %exitcond10, label %for.cond7.preheader, label %for.inc19
for.inc19: ; preds = %for.inc16
+ %scevgep9 = getelementptr i32* %B.addr.16, i64 2500
%inc20 = add nsw i64 %j.05, 1
- %cmp2 = icmp slt i64 %inc20, 50
- br i1 %cmp2, label %for.cond4.preheader, label %for.inc22
+ %exitcond12 = icmp ne i64 %inc20, 50
+ br i1 %exitcond12, label %for.cond4.preheader, label %for.inc22
for.inc22: ; preds = %for.inc19
+ %scevgep11 = getelementptr i32* %B.addr.08, i64 125000
%inc23 = add nsw i64 %i.07, 1
- %cmp = icmp slt i64 %inc23, 50
- br i1 %cmp, label %for.cond1.preheader, label %for.end24
+ %exitcond13 = icmp ne i64 %inc23, 50
+ br i1 %exitcond13, label %for.cond1.preheader, label %for.end24
for.end24: ; preds = %for.inc22
ret void
@@ -139,30 +157,37 @@ for.end24: ; preds = %for.inc22
;; for (long int i = 0; i < 50; i++)
;; for (long int j = 0; j < 50; j++)
;; for (long int k = 0; k < 50; k++)
-;; for (long int l = 0; l < 50; l++)
-;; A[i][i][i + k][l] = ...
-;; ... = A[10][i + 10][j + k][l + 10];
+;; for (long int l = 0; l < 50; l++) {
+;; A[i][i][i + k][l] = i;
+;; *B++ = A[10][i + 10][j + k][l + 10];
define void @sep2([100 x [100 x [100 x i32]]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.inc26, %entry
- %B.addr.08 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc26 ]
+; CHECK: da analyze - consistent output [0 S 0 0|<]!
+; CHECK: da analyze - flow [> * * -10]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [0 * * 0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= = = =|<]!
+
+for.cond1.preheader: ; preds = %entry, %for.inc26
+ %B.addr.08 = phi i32* [ %B, %entry ], [ %scevgep11, %for.inc26 ]
%i.07 = phi i64 [ 0, %entry ], [ %inc27, %for.inc26 ]
br label %for.cond4.preheader
-for.cond4.preheader: ; preds = %for.inc23, %for.cond1.preheader
- %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %incdec.ptr, %for.inc23 ]
+for.cond4.preheader: ; preds = %for.cond1.preheader, %for.inc23
+ %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %scevgep9, %for.inc23 ]
%j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc24, %for.inc23 ]
br label %for.cond7.preheader
-for.cond7.preheader: ; preds = %for.inc20, %for.cond4.preheader
- %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %incdec.ptr, %for.inc20 ]
+for.cond7.preheader: ; preds = %for.cond4.preheader, %for.inc20
+ %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %scevgep, %for.inc20 ]
%k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc21, %for.inc20 ]
br label %for.body9
-for.body9: ; preds = %for.body9, %for.cond7.preheader
+for.body9: ; preds = %for.cond7.preheader, %for.body9
%l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ]
%B.addr.31 = phi i32* [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ]
%conv = trunc i64 %i.07 to i32
@@ -174,27 +199,29 @@ for.body9: ; preds = %for.body9, %for.con
%add15 = add nsw i64 %i.07, 10
%arrayidx19 = getelementptr inbounds [100 x [100 x [100 x i32]]]* %A, i64 10, i64 %add15, i64 %add14, i64 %add13
%0 = load i32* %arrayidx19, align 4
-; CHECK: da analyze - flow [> * * -10]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.31, i64 1
store i32 %0, i32* %B.addr.31, align 4
%inc = add nsw i64 %l.02, 1
- %cmp8 = icmp slt i64 %inc, 50
- br i1 %cmp8, label %for.body9, label %for.inc20
+ %exitcond = icmp ne i64 %inc, 50
+ br i1 %exitcond, label %for.body9, label %for.inc20
for.inc20: ; preds = %for.body9
+ %scevgep = getelementptr i32* %B.addr.24, i64 50
%inc21 = add nsw i64 %k.03, 1
- %cmp5 = icmp slt i64 %inc21, 50
- br i1 %cmp5, label %for.cond7.preheader, label %for.inc23
+ %exitcond10 = icmp ne i64 %inc21, 50
+ br i1 %exitcond10, label %for.cond7.preheader, label %for.inc23
for.inc23: ; preds = %for.inc20
+ %scevgep9 = getelementptr i32* %B.addr.16, i64 2500
%inc24 = add nsw i64 %j.05, 1
- %cmp2 = icmp slt i64 %inc24, 50
- br i1 %cmp2, label %for.cond4.preheader, label %for.inc26
+ %exitcond12 = icmp ne i64 %inc24, 50
+ br i1 %exitcond12, label %for.cond4.preheader, label %for.inc26
for.inc26: ; preds = %for.inc23
+ %scevgep11 = getelementptr i32* %B.addr.08, i64 125000
%inc27 = add nsw i64 %i.07, 1
- %cmp = icmp slt i64 %inc27, 50
- br i1 %cmp, label %for.cond1.preheader, label %for.end28
+ %exitcond13 = icmp ne i64 %inc27, 50
+ br i1 %exitcond13, label %for.cond1.preheader, label %for.end28
for.end28: ; preds = %for.inc26
ret void
@@ -204,30 +231,37 @@ for.end28: ; preds = %for.inc26
;; for (long int i = 0; i < 50; i++)
;; for (long int j = 0; j < 50; j++)
;; for (long int k = 0; k < 50; k++)
-;; for (long int l = 0; l < 50; l++)
-;; A[i][i][i + k][l + k] = ...
-;; ... = A[10][i + 10][j + k][l + 10];
+;; for (long int l = 0; l < 50; l++) {
+;; A[i][i][i + k][l + k] = i;
+;; *B++ = A[10][i + 10][j + k][l + 10];
define void @sep3([100 x [100 x [100 x i32]]]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.inc27, %entry
- %B.addr.08 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.inc27 ]
+; CHECK: da analyze - consistent output [0 S 0 0|<]!
+; CHECK: da analyze - flow [> * * *]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [0 * * 0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [= = = =|<]!
+
+for.cond1.preheader: ; preds = %entry, %for.inc27
+ %B.addr.08 = phi i32* [ %B, %entry ], [ %scevgep11, %for.inc27 ]
%i.07 = phi i64 [ 0, %entry ], [ %inc28, %for.inc27 ]
br label %for.cond4.preheader
-for.cond4.preheader: ; preds = %for.inc24, %for.cond1.preheader
- %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %incdec.ptr, %for.inc24 ]
+for.cond4.preheader: ; preds = %for.cond1.preheader, %for.inc24
+ %B.addr.16 = phi i32* [ %B.addr.08, %for.cond1.preheader ], [ %scevgep9, %for.inc24 ]
%j.05 = phi i64 [ 0, %for.cond1.preheader ], [ %inc25, %for.inc24 ]
br label %for.cond7.preheader
-for.cond7.preheader: ; preds = %for.inc21, %for.cond4.preheader
- %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %incdec.ptr, %for.inc21 ]
+for.cond7.preheader: ; preds = %for.cond4.preheader, %for.inc21
+ %B.addr.24 = phi i32* [ %B.addr.16, %for.cond4.preheader ], [ %scevgep, %for.inc21 ]
%k.03 = phi i64 [ 0, %for.cond4.preheader ], [ %inc22, %for.inc21 ]
br label %for.body9
-for.body9: ; preds = %for.body9, %for.cond7.preheader
+for.body9: ; preds = %for.cond7.preheader, %for.body9
%l.02 = phi i64 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ]
%B.addr.31 = phi i32* [ %B.addr.24, %for.cond7.preheader ], [ %incdec.ptr, %for.body9 ]
%conv = trunc i64 %i.07 to i32
@@ -240,27 +274,29 @@ for.body9: ; preds = %for.body9, %for.con
%add16 = add nsw i64 %i.07, 10
%arrayidx20 = getelementptr inbounds [100 x [100 x [100 x i32]]]* %A, i64 10, i64 %add16, i64 %add15, i64 %add14
%0 = load i32* %arrayidx20, align 4
-; CHECK: da analyze - flow [> * * *]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.31, i64 1
store i32 %0, i32* %B.addr.31, align 4
%inc = add nsw i64 %l.02, 1
- %cmp8 = icmp slt i64 %inc, 50
- br i1 %cmp8, label %for.body9, label %for.inc21
+ %exitcond = icmp ne i64 %inc, 50
+ br i1 %exitcond, label %for.body9, label %for.inc21
for.inc21: ; preds = %for.body9
+ %scevgep = getelementptr i32* %B.addr.24, i64 50
%inc22 = add nsw i64 %k.03, 1
- %cmp5 = icmp slt i64 %inc22, 50
- br i1 %cmp5, label %for.cond7.preheader, label %for.inc24
+ %exitcond10 = icmp ne i64 %inc22, 50
+ br i1 %exitcond10, label %for.cond7.preheader, label %for.inc24
for.inc24: ; preds = %for.inc21
+ %scevgep9 = getelementptr i32* %B.addr.16, i64 2500
%inc25 = add nsw i64 %j.05, 1
- %cmp2 = icmp slt i64 %inc25, 50
- br i1 %cmp2, label %for.cond4.preheader, label %for.inc27
+ %exitcond12 = icmp ne i64 %inc25, 50
+ br i1 %exitcond12, label %for.cond4.preheader, label %for.inc27
for.inc27: ; preds = %for.inc24
+ %scevgep11 = getelementptr i32* %B.addr.08, i64 125000
%inc28 = add nsw i64 %i.07, 1
- %cmp = icmp slt i64 %inc28, 50
- br i1 %cmp, label %for.cond1.preheader, label %for.end29
+ %exitcond13 = icmp ne i64 %inc28, 50
+ br i1 %exitcond13, label %for.cond1.preheader, label %for.end29
for.end29: ; preds = %for.inc27
ret void
diff --git a/test/Analysis/DependenceAnalysis/StrongSIV.ll b/test/Analysis/DependenceAnalysis/StrongSIV.ll
index be336c3580..1cf00ad9c1 100644
--- a/test/Analysis/DependenceAnalysis/StrongSIV.ll
+++ b/test/Analysis/DependenceAnalysis/StrongSIV.ll
@@ -1,143 +1,196 @@
-; RUN: opt < %s -analyze -basicaa -indvars -da | FileCheck %s
+; RUN: opt < %s -analyze -basicaa -da | FileCheck %s
; ModuleID = 'StrongSIV.bc'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.6.0"
-;; for (int i = 0; i < n; i++)
-;; A[i + 2] = ...
-;; ... = A[i];
+;; for (int i = 0; i < n; i++) {
+;; A[i + 2] = i;
+;; *B++ = A[i];
define void @strong0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp sgt i64 %n, 0
- br i1 %cmp1, label %for.body, label %for.end
-
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
- %add = add nsw i32 %i.03, 2
- %idxprom = sext i32 %add to i64
- %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
- store i32 %i.03, i32* %arrayidx, align 4
- %idxprom2 = sext i32 %i.03 to i64
- %arrayidx3 = getelementptr inbounds i32* %A, i64 %idxprom2
- %0 = load i32* %arrayidx3, align 4
+ br i1 %cmp1, label %for.body.preheader, label %for.end
+
+; CHECK: da analyze - consistent output [0|<]!
; CHECK: da analyze - consistent flow [2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
+ %0 = add nsw i64 %indvars.iv, 2
+ %arrayidx = getelementptr inbounds i32* %A, i64 %0
+ %1 = trunc i64 %indvars.iv to i32
+ store i32 %1, i32* %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %2 = load i32* %arrayidx3, align 4
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
- store i32 %0, i32* %B.addr.02, align 4
- %inc = add nsw i32 %i.03, 1
- %conv = sext i32 %inc to i64
- %cmp = icmp slt i64 %conv, %n
- br i1 %cmp, label %for.body, label %for.end
+ store i32 %2, i32* %B.addr.02, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
-for.end: ; preds = %for.body, %entry
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
-;; for (long int i = 0; i < n; i++)
-;; A[i + 2] = ...
-;; ... = A[i];
+;; for (long int i = 0; i < n; i++) {
+;; A[i + 2] = i;
+;; *B++ = A[i];
define void @strong1(i32* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
- %conv = sext i32 %n to i64
%cmp1 = icmp sgt i32 %n, 0
- br i1 %cmp1, label %for.body, label %for.end
+ br i1 %cmp1, label %for.body.preheader, label %for.end
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - consistent flow [2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ %0 = sext i32 %n to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
%conv2 = trunc i64 %i.03 to i32
%add = add nsw i64 %i.03, 2
%arrayidx = getelementptr inbounds i32* %A, i64 %add
store i32 %conv2, i32* %arrayidx, align 4
%arrayidx3 = getelementptr inbounds i32* %A, i64 %i.03
- %0 = load i32* %arrayidx3, align 4
-; CHECK: da analyze - consistent flow [2]!
+ %1 = load i32* %arrayidx3, align 4
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
- store i32 %0, i32* %B.addr.02, align 4
+ store i32 %1, i32* %B.addr.02, align 4
%inc = add nsw i64 %i.03, 1
- %cmp = icmp slt i64 %inc, %conv
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, %0
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
-for.end: ; preds = %for.body, %entry
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
-;; for (long unsigned i = 0; i < n; i++)
-;; A[i + 2] = ...
-;; ... = A[i];
+;; for (long unsigned i = 0; i < n; i++) {
+;; A[i + 2] = i;
+;; *B++ = A[i];
define void @strong2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp eq i64 %n, 0
- br i1 %cmp1, label %for.end, label %for.body
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - consistent flow [2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
%conv = trunc i64 %i.03 to i32
%add = add i64 %i.03, 2
%arrayidx = getelementptr inbounds i32* %A, i64 %add
store i32 %conv, i32* %arrayidx, align 4
%arrayidx1 = getelementptr inbounds i32* %A, i64 %i.03
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - consistent flow [2]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc = add i64 %i.03, 1
- %cmp = icmp ult i64 %inc, %n
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
-for.end: ; preds = %for.body, %entry
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
-;; for (int i = 0; i < n; i++)
-;; A[i + 2] = ...
-;; ... = A[i];
+;; for (int i = 0; i < n; i++) {
+;; A[i + 2] = i;
+;; *B++ = A[i];
define void @strong3(i32* %A, i32* %B, i32 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp sgt i32 %n, 0
- br i1 %cmp1, label %for.body, label %for.end
-
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
- %add = add nsw i32 %i.03, 2
- %idxprom = sext i32 %add to i64
- %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
- store i32 %i.03, i32* %arrayidx, align 4
- %idxprom1 = sext i32 %i.03 to i64
- %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1
- %0 = load i32* %arrayidx2, align 4
+ br i1 %cmp1, label %for.body.preheader, label %for.end
+
+; CHECK: da analyze - consistent output [0|<]!
; CHECK: da analyze - consistent flow [2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
+ %0 = add nsw i64 %indvars.iv, 2
+ %arrayidx = getelementptr inbounds i32* %A, i64 %0
+ %1 = trunc i64 %indvars.iv to i32
+ store i32 %1, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %2 = load i32* %arrayidx2, align 4
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
- store i32 %0, i32* %B.addr.02, align 4
- %inc = add nsw i32 %i.03, 1
- %cmp = icmp slt i32 %inc, %n
- br i1 %cmp, label %for.body, label %for.end
+ store i32 %2, i32* %B.addr.02, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
-for.end: ; preds = %for.body, %entry
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
-;; for (long unsigned i = 0; i < 19; i++)
-;; A[i + 19] = ...
-;; ... = A[i];
+;; for (long unsigned i = 0; i < 19; i++) {
+;; A[i + 19] = i;
+;; *B++ = A[i];
define void @strong4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -146,27 +199,33 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx, align 4
%arrayidx1 = getelementptr inbounds i32* %A, i64 %i.02
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 19
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 19
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long unsigned i = 0; i < 20; i++)
-;; A[i + 19] = ...
-;; ... = A[i];
+;; for (long unsigned i = 0; i < 20; i++) {
+;; A[i + 19] = i;
+;; *B++ = A[i];
define void @strong5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - consistent flow [19]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -175,27 +234,33 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx, align 4
%arrayidx1 = getelementptr inbounds i32* %A, i64 %i.02
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - consistent flow [19]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 20
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 20
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long unsigned i = 0; i < 20; i++)
-;; A[2*i + 6] = ...
-;; ... = A[2*i];
+;; for (long unsigned i = 0; i < 20; i++) {
+;; A[2*i + 6] = i;
+;; *B++ = A[2*i];
define void @strong6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - consistent flow [3]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -206,27 +271,33 @@ for.body: ; preds = %for.body, %entry
%mul1 = shl i64 %i.02, 1
%arrayidx2 = getelementptr inbounds i32* %A, i64 %mul1
%0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - consistent flow [3]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 20
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 20
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long unsigned i = 0; i < 20; i++)
-;; A[2*i + 7] = ...
-;; ... = A[2*i];
+;; for (long unsigned i = 0; i < 20; i++) {
+;; A[2*i + 7] = i;
+;; *B++ = A[2*i];
define void @strong7(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -237,27 +308,33 @@ for.body: ; preds = %for.body, %entry
%mul1 = shl i64 %i.02, 1
%arrayidx2 = getelementptr inbounds i32* %A, i64 %mul1
%0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 20
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 20
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long unsigned i = 0; i < 20; i++)
-;; A[i + n] = ...
-;; ... = A[i];
+;; for (long unsigned i = 0; i < 20; i++) {
+;; A[i + n] = i;
+;; *B++ = A[i];
define void @strong8(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - consistent flow [%n|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -266,30 +343,39 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx, align 4
%arrayidx1 = getelementptr inbounds i32* %A, i64 %i.02
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - consistent flow [%n|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 20
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 20
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long unsigned i = 0; i < n; i++)
-;; A[i + n] = ...
-;; ... = A[i + 2*n];
+;; for (long unsigned i = 0; i < n; i++) {
+;; A[i + n] = i;
+;; *B++ = A[i + 2*n];
define void @strong9(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp eq i64 %n, 0
- br i1 %cmp1, label %for.end, label %for.body
+ br i1 %cmp1, label %for.end, label %for.body.preheader
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
%conv = trunc i64 %i.03 to i32
%add = add i64 %i.03, %n
%arrayidx = getelementptr inbounds i32* %A, i64 %add
@@ -298,27 +384,36 @@ for.body: ; preds = %for.body, %entry
%add1 = add i64 %i.03, %mul
%arrayidx2 = getelementptr inbounds i32* %A, i64 %add1
%0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc = add i64 %i.03, 1
- %cmp = icmp ult i64 %inc, %n
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
-for.end: ; preds = %for.body, %entry
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
-;; for (long unsigned i = 0; i < 1000; i++)
-;; A[n*i + 5] = ...
-;; ... = A[n*i + 5];
+;; for (long unsigned i = 0; i < 1000; i++) {
+;; A[n*i + 5] = i;
+;; *B++ = A[n*i + 5];
define void @strong10(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - consistent flow [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -330,12 +425,11 @@ for.body: ; preds = %for.body, %entry
%add2 = add i64 %mul1, 5
%arrayidx3 = getelementptr inbounds i32* %A, i64 %add2
%0 = load i32* %arrayidx3, align 4
-; CHECK: da analyze - consistent flow [0|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 1000
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 1000
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
diff --git a/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll b/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
index 2a1b4e7e97..5565f64811 100644
--- a/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
+++ b/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
@@ -6,65 +6,99 @@ target triple = "x86_64-apple-macosx10.6.0"
;; for (long int i = 0; i < n1; i++)
-;; A[2*i + n1] = ...
+;; A[2*i + n1] = i;
;; for (long int j = 0; j < n2; j++)
-;; ... = A[3*j + 3*n1];
+;; *B++ = A[3*j + 3*n1];
define void @symbolicrdiv0(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
entry:
%cmp4 = icmp eq i64 %n1, 0
- br i1 %cmp4, label %for.cond1.preheader, label %for.body
+ br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
-for.cond1.preheader: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond1.preheader.loopexit: ; preds = %for.body
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.cond1.preheader.loopexit, %entry
%cmp21 = icmp eq i64 %n2, 0
- br i1 %cmp21, label %for.end11, label %for.body4
+ br i1 %cmp21, label %for.end11, label %for.body4.preheader
-for.body: ; preds = %for.body, %entry
- %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+for.body4.preheader: ; preds = %for.cond1.preheader
+ br label %for.body4
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.05 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%conv = trunc i64 %i.05 to i32
%mul = shl nsw i64 %i.05, 1
%add = add i64 %mul, %n1
%arrayidx = getelementptr inbounds i32* %A, i64 %add
store i32 %conv, i32* %arrayidx, align 4
%inc = add nsw i64 %i.05, 1
- %cmp = icmp ult i64 %inc, %n1
- br i1 %cmp, label %for.body, label %for.cond1.preheader
+ %exitcond = icmp ne i64 %inc, %n1
+ br i1 %exitcond, label %for.body, label %for.cond1.preheader.loopexit
-for.body4: ; preds = %for.body4, %for.cond1.preheader
- %j.03 = phi i64 [ %inc10, %for.body4 ], [ 0, %for.cond1.preheader ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ]
+for.body4: ; preds = %for.body4.preheader, %for.body4
+ %j.03 = phi i64 [ %inc10, %for.body4 ], [ 0, %for.body4.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
%mul56 = add i64 %j.03, %n1
%add7 = mul i64 %mul56, 3
%arrayidx8 = getelementptr inbounds i32* %A, i64 %add7
%0 = load i32* %arrayidx8, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc10 = add nsw i64 %j.03, 1
- %cmp2 = icmp ult i64 %inc10, %n2
- br i1 %cmp2, label %for.body4, label %for.end11
+ %exitcond7 = icmp ne i64 %inc10, %n2
+ br i1 %exitcond7, label %for.body4, label %for.end11.loopexit
+
+for.end11.loopexit: ; preds = %for.body4
+ br label %for.end11
-for.end11: ; preds = %for.body4, %for.cond1.preheader
+for.end11: ; preds = %for.end11.loopexit, %for.cond1.preheader
ret void
}
;; for (long int i = 0; i < n1; i++)
-;; A[2*i + 5*n2] = ...
+;; A[2*i + 5*n2] = i;
;; for (long int j = 0; j < n2; j++)
-;; ... = A[3*j + 2*n2];
+;; *B++ = A[3*j + 2*n2];
define void @symbolicrdiv1(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
entry:
%cmp4 = icmp eq i64 %n1, 0
- br i1 %cmp4, label %for.cond2.preheader, label %for.body
+ br i1 %cmp4, label %for.cond2.preheader, label %for.body.preheader
-for.cond2.preheader: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond2.preheader.loopexit: ; preds = %for.body
+ br label %for.cond2.preheader
+
+for.cond2.preheader: ; preds = %for.cond2.preheader.loopexit, %entry
%cmp31 = icmp eq i64 %n2, 0
- br i1 %cmp31, label %for.end12, label %for.body5
+ br i1 %cmp31, label %for.end12, label %for.body5.preheader
+
+for.body5.preheader: ; preds = %for.cond2.preheader
+ br label %for.body5
-for.body: ; preds = %for.body, %entry
- %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.05 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%conv = trunc i64 %i.05 to i32
%mul = shl nsw i64 %i.05, 1
%mul1 = mul i64 %n2, 5
@@ -72,220 +106,307 @@ for.body: ; preds = %for.body, %entry
%arrayidx = getelementptr inbounds i32* %A, i64 %add
store i32 %conv, i32* %arrayidx, align 4
%inc = add nsw i64 %i.05, 1
- %cmp = icmp ult i64 %inc, %n1
- br i1 %cmp, label %for.body, label %for.cond2.preheader
+ %exitcond = icmp ne i64 %inc, %n1
+ br i1 %exitcond, label %for.body, label %for.cond2.preheader.loopexit
-for.body5: ; preds = %for.body5, %for.cond2.preheader
- %j.03 = phi i64 [ %inc11, %for.body5 ], [ 0, %for.cond2.preheader ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body5 ], [ %B, %for.cond2.preheader ]
+for.body5: ; preds = %for.body5.preheader, %for.body5
+ %j.03 = phi i64 [ %inc11, %for.body5 ], [ 0, %for.body5.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body5 ], [ %B, %for.body5.preheader ]
%mul6 = mul nsw i64 %j.03, 3
%mul7 = shl i64 %n2, 1
%add8 = add i64 %mul6, %mul7
%arrayidx9 = getelementptr inbounds i32* %A, i64 %add8
%0 = load i32* %arrayidx9, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc11 = add nsw i64 %j.03, 1
- %cmp3 = icmp ult i64 %inc11, %n2
- br i1 %cmp3, label %for.body5, label %for.end12
+ %exitcond6 = icmp ne i64 %inc11, %n2
+ br i1 %exitcond6, label %for.body5, label %for.end12.loopexit
-for.end12: ; preds = %for.body5, %for.cond2.preheader
+for.end12.loopexit: ; preds = %for.body5
+ br label %for.end12
+
+for.end12: ; preds = %for.end12.loopexit, %for.cond2.preheader
ret void
}
;; for (long int i = 0; i < n1; i++)
-;; A[2*i - n2] = ...
+;; A[2*i - n2] = i;
;; for (long int j = 0; j < n2; j++)
-;; ... = A[-j + 2*n1];
+;; *B++ = A[-j + 2*n1];
define void @symbolicrdiv2(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
entry:
%cmp4 = icmp eq i64 %n1, 0
- br i1 %cmp4, label %for.cond1.preheader, label %for.body
+ br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
+
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
-for.cond1.preheader: ; preds = %for.body, %entry
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond1.preheader.loopexit: ; preds = %for.body
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.cond1.preheader.loopexit, %entry
%cmp21 = icmp eq i64 %n2, 0
- br i1 %cmp21, label %for.end10, label %for.body4
+ br i1 %cmp21, label %for.end10, label %for.body4.preheader
+
+for.body4.preheader: ; preds = %for.cond1.preheader
+ br label %for.body4
-for.body: ; preds = %for.body, %entry
- %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.05 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%conv = trunc i64 %i.05 to i32
%mul = shl nsw i64 %i.05, 1
%sub = sub i64 %mul, %n2
%arrayidx = getelementptr inbounds i32* %A, i64 %sub
store i32 %conv, i32* %arrayidx, align 4
%inc = add nsw i64 %i.05, 1
- %cmp = icmp ult i64 %inc, %n1
- br i1 %cmp, label %for.body, label %for.cond1.preheader
+ %exitcond = icmp ne i64 %inc, %n1
+ br i1 %exitcond, label %for.body, label %for.cond1.preheader.loopexit
-for.body4: ; preds = %for.body4, %for.cond1.preheader
- %j.03 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.cond1.preheader ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ]
+for.body4: ; preds = %for.body4.preheader, %for.body4
+ %j.03 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.body4.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
%mul6 = shl i64 %n1, 1
%add = sub i64 %mul6, %j.03
%arrayidx7 = getelementptr inbounds i32* %A, i64 %add
%0 = load i32* %arrayidx7, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc9 = add nsw i64 %j.03, 1
- %cmp2 = icmp ult i64 %inc9, %n2
- br i1 %cmp2, label %for.body4, label %for.end10
+ %exitcond6 = icmp ne i64 %inc9, %n2
+ br i1 %exitcond6, label %for.body4, label %for.end10.loopexit
+
+for.end10.loopexit: ; preds = %for.body4
+ br label %for.end10
-for.end10: ; preds = %for.body4, %for.cond1.preheader
+for.end10: ; preds = %for.end10.loopexit, %for.cond1.preheader
ret void
}
;; for (long int i = 0; i < n1; i++)
-;; A[-i + n2] = ...
+;; A[-i + n2] = i;
;; for (long int j = 0; j < n2; j++)
-;; ... = A[j - n1];
+;; *B++ = A[j - n1];
define void @symbolicrdiv3(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
entry:
%cmp4 = icmp eq i64 %n1, 0
- br i1 %cmp4, label %for.cond1.preheader, label %for.body
+ br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
+
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond1.preheader.loopexit: ; preds = %for.body
+ br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.body, %entry
+for.cond1.preheader: ; preds = %for.cond1.preheader.loopexit, %entry
%cmp21 = icmp eq i64 %n2, 0
- br i1 %cmp21, label %for.end9, label %for.body4
+ br i1 %cmp21, label %for.end9, label %for.body4.preheader
-for.body: ; preds = %for.body, %entry
- %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+for.body4.preheader: ; preds = %for.cond1.preheader
+ br label %for.body4
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.05 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%conv = trunc i64 %i.05 to i32
%add = sub i64 %n2, %i.05
%arrayidx = getelementptr inbounds i32* %A, i64 %add
store i32 %conv, i32* %arrayidx, align 4
%inc = add nsw i64 %i.05, 1
- %cmp = icmp ult i64 %inc, %n1
- br i1 %cmp, label %for.body, label %for.cond1.preheader
+ %exitcond = icmp ne i64 %inc, %n1
+ br i1 %exitcond, label %for.body, label %for.cond1.preheader.loopexit
-for.body4: ; preds = %for.body4, %for.cond1.preheader
- %j.03 = phi i64 [ %inc8, %for.body4 ], [ 0, %for.cond1.preheader ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ]
+for.body4: ; preds = %for.body4.preheader, %for.body4
+ %j.03 = phi i64 [ %inc8, %for.body4 ], [ 0, %for.body4.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
%sub5 = sub i64 %j.03, %n1
%arrayidx6 = getelementptr inbounds i32* %A, i64 %sub5
%0 = load i32* %arrayidx6, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc8 = add nsw i64 %j.03, 1
- %cmp2 = icmp ult i64 %inc8, %n2
- br i1 %cmp2, label %for.body4, label %for.end9
+ %exitcond6 = icmp ne i64 %inc8, %n2
+ br i1 %exitcond6, label %for.body4, label %for.end9.loopexit
+
+for.end9.loopexit: ; preds = %for.body4
+ br label %for.end9
-for.end9: ; preds = %for.body4, %for.cond1.preheader
+for.end9: ; preds = %for.end9.loopexit, %for.cond1.preheader
ret void
}
;; for (long int i = 0; i < n1; i++)
-;; A[-i + 2*n1] = ...
+;; A[-i + 2*n1] = i;
;; for (long int j = 0; j < n2; j++)
-;; ... = A[-j + n1];
+;; *B++ = A[-j + n1];
define void @symbolicrdiv4(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
entry:
%cmp4 = icmp eq i64 %n1, 0
- br i1 %cmp4, label %for.cond1.preheader, label %for.body
+ br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
+
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
-for.cond1.preheader: ; preds = %for.body, %entry
+for.cond1.preheader.loopexit: ; preds = %for.body
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.cond1.preheader.loopexit, %entry
%cmp21 = icmp eq i64 %n2, 0
- br i1 %cmp21, label %for.end10, label %for.body4
+ br i1 %cmp21, label %for.end10, label %for.body4.preheader
+
+for.body4.preheader: ; preds = %for.cond1.preheader
+ br label %for.body4
-for.body: ; preds = %for.body, %entry
- %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.05 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%conv = trunc i64 %i.05 to i32
%mul = shl i64 %n1, 1
%add = sub i64 %mul, %i.05
%arrayidx = getelementptr inbounds i32* %A, i64 %add
store i32 %conv, i32* %arrayidx, align 4
%inc = add nsw i64 %i.05, 1
- %cmp = icmp ult i64 %inc, %n1
- br i1 %cmp, label %for.body, label %for.cond1.preheader
+ %exitcond = icmp ne i64 %inc, %n1
+ br i1 %exitcond, label %for.body, label %for.cond1.preheader.loopexit
-for.body4: ; preds = %for.body4, %for.cond1.preheader
- %j.03 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.cond1.preheader ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ]
+for.body4: ; preds = %for.body4.preheader, %for.body4
+ %j.03 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.body4.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
%add6 = sub i64 %n1, %j.03
%arrayidx7 = getelementptr inbounds i32* %A, i64 %add6
%0 = load i32* %arrayidx7, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc9 = add nsw i64 %j.03, 1
- %cmp2 = icmp ult i64 %inc9, %n2
- br i1 %cmp2, label %for.body4, label %for.end10
+ %exitcond6 = icmp ne i64 %inc9, %n2
+ br i1 %exitcond6, label %for.body4, label %for.end10.loopexit
-for.end10: ; preds = %for.body4, %for.cond1.preheader
+for.end10.loopexit: ; preds = %for.body4
+ br label %for.end10
+
+for.end10: ; preds = %for.end10.loopexit, %for.cond1.preheader
ret void
}
;; for (long int i = 0; i < n1; i++)
-;; A[-i + n2] = ...
+;; A[-i + n2] = i;
;; for (long int j = 0; j < n2; j++)
-;; ... = A[-j + 2*n2];
+;; *B++ = A[-j + 2*n2];
define void @symbolicrdiv5(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
entry:
%cmp4 = icmp eq i64 %n1, 0
- br i1 %cmp4, label %for.cond1.preheader, label %for.body
+ br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
+
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
-for.cond1.preheader: ; preds = %for.body, %entry
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond1.preheader.loopexit: ; preds = %for.body
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.cond1.preheader.loopexit, %entry
%cmp21 = icmp eq i64 %n2, 0
- br i1 %cmp21, label %for.end10, label %for.body4
+ br i1 %cmp21, label %for.end10, label %for.body4.preheader
+
+for.body4.preheader: ; preds = %for.cond1.preheader
+ br label %for.body4
-for.body: ; preds = %for.body, %entry
- %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.05 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%conv = trunc i64 %i.05 to i32
%add = sub i64 %n2, %i.05
%arrayidx = getelementptr inbounds i32* %A, i64 %add
store i32 %conv, i32* %arrayidx, align 4
%inc = add nsw i64 %i.05, 1
- %cmp = icmp ult i64 %inc, %n1
- br i1 %cmp, label %for.body, label %for.cond1.preheader
+ %exitcond = icmp ne i64 %inc, %n1
+ br i1 %exitcond, label %for.body, label %for.cond1.preheader.loopexit
-for.body4: ; preds = %for.body4, %for.cond1.preheader
- %j.03 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.cond1.preheader ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.cond1.preheader ]
+for.body4: ; preds = %for.body4.preheader, %for.body4
+ %j.03 = phi i64 [ %inc9, %for.body4 ], [ 0, %for.body4.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
%mul = shl i64 %n2, 1
%add6 = sub i64 %mul, %j.03
%arrayidx7 = getelementptr inbounds i32* %A, i64 %add6
%0 = load i32* %arrayidx7, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc9 = add nsw i64 %j.03, 1
- %cmp2 = icmp ult i64 %inc9, %n2
- br i1 %cmp2, label %for.body4, label %for.end10
+ %exitcond6 = icmp ne i64 %inc9, %n2
+ br i1 %exitcond6, label %for.body4, label %for.end10.loopexit
+
+for.end10.loopexit: ; preds = %for.body4
+ br label %for.end10
-for.end10: ; preds = %for.body4, %for.cond1.preheader
+for.end10: ; preds = %for.end10.loopexit, %for.cond1.preheader
ret void
}
;; for (long int i = 0; i < n1; i++)
-;; for (long int j = 0; j < n2; j++)
-;; A[j -i + n2] = ...
-;; ... = A[2*n2];
+;; for (long int j = 0; j < n2; j++) {
+;; A[j -i + n2] = i;
+;; *B++ = A[2*n2];
define void @symbolicrdiv6(i32* %A, i32* %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
entry:
%cmp4 = icmp eq i64 %n1, 0
- br i1 %cmp4, label %for.end7, label %for.cond1.preheader
+ br i1 %cmp4, label %for.end7, label %for.cond1.preheader.preheader
+
+; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S S|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* *|<]!
+
+for.cond1.preheader.preheader: ; preds = %entry
+ br label %for.cond1.preheader
-for.cond1.preheader: ; preds = %for.inc5, %entry
- %B.addr.06 = phi i32* [ %B.addr.1.lcssa, %for.inc5 ], [ %B, %entry ]
- %i.05 = phi i64 [ %inc6, %for.inc5 ], [ 0, %entry ]
+for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc5
+ %B.addr.06 = phi i32* [ %B.addr.1.lcssa, %for.inc5 ], [ %B, %for.cond1.preheader.preheader ]
+ %i.05 = phi i64 [ %inc6, %for.inc5 ], [ 0, %for.cond1.preheader.preheader ]
%cmp21 = icmp eq i64 %n2, 0
- br i1 %cmp21, label %for.inc5, label %for.body3
+ br i1 %cmp21, label %for.inc5, label %for.body3.preheader
+
+for.body3.preheader: ; preds = %for.cond1.preheader
+ br label %for.body3
-for.body3: ; preds = %for.body3, %for.cond1.preheader
- %j.03 = phi i64 [ %inc, %for.body3 ], [ 0, %for.cond1.preheader ]
- %B.addr.12 = phi i32* [ %incdec.ptr, %for.body3 ], [ %B.addr.06, %for.cond1.preheader ]
+for.body3: ; preds = %for.body3.preheader, %for.body3
+ %j.03 = phi i64 [ %inc, %for.body3 ], [ 0, %for.body3.preheader ]
+ %B.addr.12 = phi i32* [ %incdec.ptr, %for.body3 ], [ %B.addr.06, %for.body3.preheader ]
%conv = trunc i64 %i.05 to i32
%sub = sub nsw i64 %j.03, %i.05
%add = add i64 %sub, %n2
@@ -294,19 +415,25 @@ for.body3: ; preds = %for.body3, %for.con
%mul = shl i64 %n2, 1
%arrayidx4 = getelementptr inbounds i32* %A, i64 %mul
%0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.12, i64 1
store i32 %0, i32* %B.addr.12, align 4
%inc = add nsw i64 %j.03, 1
- %cmp2 = icmp ult i64 %inc, %n2
- br i1 %cmp2, label %for.body3, label %for.inc5
+ %exitcond = icmp ne i64 %inc, %n2
+ br i1 %exitcond, label %for.body3, label %for.inc5.loopexit
+
+for.inc5.loopexit: ; preds = %for.body3
+ %scevgep = getelementptr i32* %B.addr.06, i64 %n2
+ br label %for.inc5
-for.inc5: ; preds = %for.body3, %for.cond1.preheader
- %B.addr.1.lcssa = phi i32* [ %B.addr.06, %for.cond1.preheader ], [ %incdec.ptr, %for.body3 ]
+for.inc5: ; preds = %for.inc5.loopexit, %for.cond1.preheader
+ %B.addr.1.lcssa = phi i32* [ %B.addr.06, %for.cond1.preheader ], [ %scevgep, %for.inc5.loopexit ]
%inc6 = add nsw i64 %i.05, 1
- %cmp = icmp ult i64 %inc6, %n1
- br i1 %cmp, label %for.cond1.preheader, label %for.end7
+ %exitcond7 = icmp ne i64 %inc6, %n1
+ br i1 %exitcond7, label %for.cond1.preheader, label %for.end7.loopexit
+
+for.end7.loopexit: ; preds = %for.inc5
+ br label %for.end7
-for.end7: ; preds = %for.inc5, %entry
+for.end7: ; preds = %for.end7.loopexit, %entry
ret void
}
diff --git a/test/Analysis/DependenceAnalysis/SymbolicSIV.ll b/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
index ee2343fa51..074cc56e9b 100644
--- a/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
+++ b/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
@@ -5,18 +5,28 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-macosx10.6.0"
-;; for (long int i = 0; i < n; i++)
-;; A[2*i + n] = ...
-;; ... = A[3*i + 3*n];
+;; for (long int i = 0; i < n; i++) {
+;; A[2*i + n] = i;
+;; *B++ = A[3*i + 3*n];
define void @symbolicsiv0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp eq i64 %n, 0
- br i1 %cmp1, label %for.end, label %for.body
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
%conv = trunc i64 %i.03 to i32
%mul = shl nsw i64 %i.03, 1
%add = add i64 %mul, %n
@@ -26,30 +36,42 @@ for.body: ; preds = %for.body, %entry
%add3 = mul i64 %mul14, 3
%arrayidx4 = getelementptr inbounds i32* %A, i64 %add3
%0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc = add nsw i64 %i.03, 1
- %cmp = icmp ult i64 %inc, %n
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
-for.end: ; preds = %for.body, %entry
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
-;; for (long int i = 0; i < n; i++)
-;; A[2*i + 5*n] = ...
-;; ... = A[3*i + 2*n];
+;; for (long int i = 0; i < n; i++) {
+;; A[2*i + 5*n] = i;
+;; *B++ = A[3*i + 2*n];
define void @symbolicsiv1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp eq i64 %n, 0
- br i1 %cmp1, label %for.end, label %for.body
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
%conv = trunc i64 %i.03 to i32
%mul = shl nsw i64 %i.03, 1
%mul1 = mul i64 %n, 5
@@ -61,30 +83,42 @@ for.body: ; preds = %for.body, %entry
%add4 = add i64 %mul2, %mul3
%arrayidx5 = getelementptr inbounds i32* %A, i64 %add4
%0 = load i32* %arrayidx5, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc = add nsw i64 %i.03, 1
- %cmp = icmp ult i64 %inc, %n
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
-for.end: ; preds = %for.body, %entry
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
-;; for (long int i = 0; i < n; i++)
-;; A[2*i - n] = ...
-;; ... = A[-i + 2*n];
+;; for (long int i = 0; i < n; i++) {
+;; A[2*i - n] = i;
+;; *B++ = A[-i + 2*n];
define void @symbolicsiv2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp eq i64 %n, 0
- br i1 %cmp1, label %for.end, label %for.body
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
%conv = trunc i64 %i.03 to i32
%mul = shl nsw i64 %i.03, 1
%sub = sub i64 %mul, %n
@@ -94,30 +128,42 @@ for.body: ; preds = %for.body, %entry
%add = sub i64 %mul2, %i.03
%arrayidx3 = getelementptr inbounds i32* %A, i64 %add
%0 = load i32* %arrayidx3, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc = add nsw i64 %i.03, 1
- %cmp = icmp ult i64 %inc, %n
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
-for.end: ; preds = %for.body, %entry
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
-;; for (long int i = 0; i < n; i++)
-;; A[-2*i + n + 1] = ...
-;; ... = A[i - 2*n];
+;; for (long int i = 0; i < n; i++) {
+;; A[-2*i + n + 1] = i;
+;; *B++ = A[i - 2*n];
define void @symbolicsiv3(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp eq i64 %n, 0
- br i1 %cmp1, label %for.end, label %for.body
+ br i1 %cmp1, label %for.end, label %for.body.preheader
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
%conv = trunc i64 %i.03 to i32
%mul = mul nsw i64 %i.03, -2
%add = add i64 %mul, %n
@@ -128,30 +174,42 @@ for.body: ; preds = %for.body, %entry
%sub = sub i64 %i.03, %mul2
%arrayidx3 = getelementptr inbounds i32* %A, i64 %sub
%0 = load i32* %arrayidx3, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc = add nsw i64 %i.03, 1
- %cmp = icmp ult i64 %inc, %n
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
-for.end: ; preds = %for.body, %entry
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
-;; for (long int i = 0; i < n; i++)
-;; A[-2*i + 3*n] = ...
-;; ... = A[-i + n];
+;; for (long int i = 0; i < n; i++) {
+;; A[-2*i + 3*n] = i;
+;; *B++ = A[-i + n];
define void @symbolicsiv4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp eq i64 %n, 0
- br i1 %cmp1, label %for.end, label %for.body
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
%conv = trunc i64 %i.03 to i32
%mul = mul nsw i64 %i.03, -2
%mul1 = mul i64 %n, 3
@@ -161,30 +219,42 @@ for.body: ; preds = %for.body, %entry
%add2 = sub i64 %n, %i.03
%arrayidx3 = getelementptr inbounds i32* %A, i64 %add2
%0 = load i32* %arrayidx3, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc = add nsw i64 %i.03, 1
- %cmp = icmp ult i64 %inc, %n
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
-for.end: ; preds = %for.body, %entry
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
-;; for (long int i = 0; i < n; i++)
-;; A[-2*i - 2*n] = ...
-;; ... = A[-i - n];
+;; for (long int i = 0; i < n; i++) {
+;; A[-2*i - 2*n] = i;
+;; *B++ = A[-i - n];
define void @symbolicsiv5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp eq i64 %n, 0
- br i1 %cmp1, label %for.end, label %for.body
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
%conv = trunc i64 %i.03 to i32
%mul = mul nsw i64 %i.03, -2
%mul1 = shl i64 %n, 1
@@ -195,32 +265,44 @@ for.body: ; preds = %for.body, %entry
%sub3 = sub i64 %sub2, %n
%arrayidx4 = getelementptr inbounds i32* %A, i64 %sub3
%0 = load i32* %arrayidx4, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc = add nsw i64 %i.03, 1
- %cmp = icmp ult i64 %inc, %n
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
-for.end: ; preds = %for.body, %entry
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
;; why doesn't SCEV package understand that n >= 0?
-;;void weaktest(int *A, int *B, long unsigned n)
-;; for (long unsigned i = 0; i < n; i++)
-;; A[i + n + 1] = ...
-;; ... = A[-i];
+;; for (long unsigned i = 0; i < n; i++) {
+;; A[i + n + 1] = i;
+;; *B++ = A[-i];
define void @weaktest(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp eq i64 %n, 0
- br i1 %cmp1, label %for.end, label %for.body
+ br i1 %cmp1, label %for.end, label %for.body.preheader
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [*|<] splitable!
+; CHECK: da analyze - split level = 1, iteration = ((0 smax (-1 + (-1 * %n))) /u 2)!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
%conv = trunc i64 %i.03 to i32
%add = add i64 %i.03, %n
%add1 = add i64 %add, 1
@@ -229,29 +311,36 @@ for.body: ; preds = %for.body, %entry
%sub = sub i64 0, %i.03
%arrayidx2 = getelementptr inbounds i32* %A, i64 %sub
%0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - flow [*|<] splitable!
-; CHECK: da analyze - split level = 1, iteration = ((0 smax (-1 + (-1 * %n))) /u 2)!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc = add i64 %i.03, 1
- %cmp = icmp ult i64 %inc, %n
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
-for.end: ; preds = %for.body, %entry
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
-;; void symbolicsiv6(int *A, int *B, long unsigned n, long unsigned N, long unsigned M) {
-;; for (long int i = 0; i < n; i++) {
-;; A[4*N*i + M] = i;
-;; *B++ = A[4*N*i + 3*M + 1];
+;; for (long int i = 0; i < n; i++) {
+;; A[4*N*i + M] = i;
+;; *B++ = A[4*N*i + 3*M + 1];
define void @symbolicsiv6(i32* %A, i32* %B, i64 %n, i64 %N, i64 %M) nounwind uwtable ssp {
entry:
%cmp1 = icmp eq i64 %n, 0
br i1 %cmp1, label %for.end, label %for.body.preheader
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
for.body.preheader: ; preds = %entry
br label %for.body
@@ -272,7 +361,6 @@ for.body: ; preds = %for.body.preheader,
%arrayidx7 = getelementptr inbounds i32* %A, i64 %add6
%0 = load i32* %arrayidx7, align 4
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
-; CHECK: da analyze - none!
store i32 %0, i32* %B.addr.02, align 4
%inc = add nsw i64 %i.03, 1
%exitcond = icmp ne i64 %inc, %n
@@ -286,16 +374,22 @@ for.end: ; preds = %for.end.loopexit, %
}
-;; void symbolicsiv7(int *A, int *B, long unsigned n, long unsigned N, long unsigned M) {
-;; for (long int i = 0; i < n; i++) {
-;; A[2*N*i + M] = i;
-;; *B++ = A[2*N*i - 3*M + 2];
+;; for (long int i = 0; i < n; i++) {
+;; A[2*N*i + M] = i;
+;; *B++ = A[2*N*i - 3*M + 2];
define void @symbolicsiv7(i32* %A, i32* %B, i64 %n, i64 %N, i64 %M) nounwind uwtable ssp {
entry:
%cmp1 = icmp eq i64 %n, 0
br i1 %cmp1, label %for.end, label %for.body.preheader
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [<>]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
for.body.preheader: ; preds = %entry
br label %for.body
@@ -316,7 +410,6 @@ for.body: ; preds = %for.body.preheader,
%arrayidx6 = getelementptr inbounds i32* %A, i64 %add5
%1 = load i32* %arrayidx6, align 4
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
-; CHECK: da analyze - flow [<>]!
store i32 %1, i32* %B.addr.02, align 4
%inc = add nsw i64 %i.03, 1
%exitcond = icmp ne i64 %inc, %n
diff --git a/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll b/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
index 343e8f49bf..0fc73aa643 100644
--- a/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
+++ b/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
@@ -5,18 +5,28 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-macosx10.6.0"
-;; for (long unsigned i = 0; i < n; i++)
-;; A[1 + n*i] = ...
-;; ... = A[1 - n*i];
+;; for (long unsigned i = 0; i < n; i++) {
+;; A[1 + n*i] = i;
+;; *B++ = A[1 - n*i];
define void @weakcrossing0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp eq i64 %n, 0
- br i1 %cmp1, label %for.end, label %for.body
+ br i1 %cmp1, label %for.end, label %for.body.preheader
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
%conv = trunc i64 %i.03 to i32
%mul = mul i64 %i.03, %n
%add = add i64 %mul, 1
@@ -26,30 +36,43 @@ for.body: ; preds = %for.body, %entry
%sub = sub i64 1, %mul1
%arrayidx2 = getelementptr inbounds i32* %A, i64 %sub
%0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - flow [0|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc = add i64 %i.03, 1
- %cmp = icmp ult i64 %inc, %n
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
-for.end: ; preds = %for.body, %entry
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
-;; for (long unsigned i = 0; i < n; i++)
-;; A[n + i] = ...
-;; ... = A[1 + n - i];
+;; for (long unsigned i = 0; i < n; i++) {
+;; A[n + i] = i;
+;; *B++ = A[1 + n - i];
define void @weakcrossing1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp eq i64 %n, 0
- br i1 %cmp1, label %for.end, label %for.body
+ br i1 %cmp1, label %for.end, label %for.body.preheader
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [<>] splitable!
+; CHECK: da analyze - split level = 1, iteration = 0!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
%conv = trunc i64 %i.03 to i32
%add = add i64 %i.03, %n
%arrayidx = getelementptr inbounds i32* %A, i64 %add
@@ -58,28 +81,36 @@ for.body: ; preds = %for.body, %entry
%sub = sub i64 %add1, %i.03
%arrayidx2 = getelementptr inbounds i32* %A, i64 %sub
%0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - flow [<>] splitable!
-; CHECK: da analyze - split level = 1, iteration = 0!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc = add i64 %i.03, 1
- %cmp = icmp ult i64 %inc, %n
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
-for.end: ; preds = %for.body, %entry
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
-;; for (long unsigned i = 0; i < 3; i++)
-;; A[i] = ...
-;; ... = A[6 - i];
+;; for (long unsigned i = 0; i < 3; i++) {
+;; A[i] = i;
+;; *B++ = A[6 - i];
define void @weakcrossing2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -88,27 +119,33 @@ for.body: ; preds = %for.body, %entry
%sub = sub i64 6, %i.02
%arrayidx1 = getelementptr inbounds i32* %A, i64 %sub
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 3
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 3
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long unsigned i = 0; i < 4; i++)
-;; A[i] = ...
-;; ... = A[6 - i];
+;; for (long unsigned i = 0; i < 4; i++) {
+;; A[i] = i;
+;; *B++ = A[6 - i];
define void @weakcrossing3(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -117,27 +154,33 @@ for.body: ; preds = %for.body, %entry
%sub = sub i64 6, %i.02
%arrayidx1 = getelementptr inbounds i32* %A, i64 %sub
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [0|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 4
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 4
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long unsigned i = 0; i < 10; i++)
-;; A[i] = ...
-;; ... = A[-6 - i];
+;; for (long unsigned i = 0; i < 10; i++) {
+;; A[i] = i;
+;; *B++ = A[-6 - i];
define void @weakcrossing4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -146,30 +189,39 @@ for.body: ; preds = %for.body, %entry
%sub = sub i64 -6, %i.02
%arrayidx1 = getelementptr inbounds i32* %A, i64 %sub
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 10
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 10
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long unsigned i = 0; i < n; i++)
-;; A[3*i] = ...
-;; ... = A[5 - 3*i];
+;; for (long unsigned i = 0; i < n; i++) {
+;; A[3*i] = i;
+;; *B++ = A[5 - 3*i];
define void @weakcrossing5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp eq i64 %n, 0
- br i1 %cmp1, label %for.end, label %for.body
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
%conv = trunc i64 %i.03 to i32
%mul = mul i64 %i.03, 3
%arrayidx = getelementptr inbounds i32* %A, i64 %mul
@@ -178,27 +230,37 @@ for.body: ; preds = %for.body, %entry
%sub = add i64 %0, 5
%arrayidx2 = getelementptr inbounds i32* %A, i64 %sub
%1 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %1, i32* %B.addr.02, align 4
%inc = add i64 %i.03, 1
- %cmp = icmp ult i64 %inc, %n
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
-for.end: ; preds = %for.body, %entry
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
-;; for (long unsigned i = 0; i < 4; i++)
-;; A[i] = ...
-;; ... = A[5 - i];
+;; for (long unsigned i = 0; i < 4; i++) {
+;; A[i] = i;
+;; *B++ = A[5 - i];
define void @weakcrossing6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [<>] splitable!
+; CHECK: da analyze - split level = 1, iteration = 2!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -207,13 +269,11 @@ for.body: ; preds = %for.body, %entry
%sub = sub i64 5, %i.02
%arrayidx1 = getelementptr inbounds i32* %A, i64 %sub
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [<>] splitable!
-; CHECK: da analyze - split level = 1, iteration = 2!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 4
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 4
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
diff --git a/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll b/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll
index a59871602b..e78bc5a139 100644
--- a/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll
+++ b/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll
@@ -5,15 +5,22 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-macosx10.6.0"
-;; for (long unsigned i = 0; i < 30; i++)
-;; A[2*i + 10] = ...
-;; ... = A[10];
+;; for (long unsigned i = 0; i < 30; i++) {
+;; A[2*i + 10] = i;
+;; *B++ = A[10];
define void @weakzerodst0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [p<=|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -23,30 +30,39 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx, align 4
%arrayidx1 = getelementptr inbounds i32* %A, i64 10
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [p<=|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 30
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 30
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long unsigned i = 0; i < n; i++)
-;; A[n*i + 10] = ...
-;; ... = A[10];
+;; for (long unsigned i = 0; i < n; i++) {
+;; A[n*i + 10] = i;
+;; *B++ = A[10];
define void @weakzerodst1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp eq i64 %n, 0
- br i1 %cmp1, label %for.end, label %for.body
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [p<=|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
%conv = trunc i64 %i.03 to i32
%mul = mul i64 %i.03, %n
%add = add i64 %mul, 10
@@ -54,27 +70,36 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx, align 4
%arrayidx1 = getelementptr inbounds i32* %A, i64 10
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [p<=|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc = add i64 %i.03, 1
- %cmp = icmp ult i64 %inc, %n
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
-for.end: ; preds = %for.body, %entry
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
-;; for (long unsigned i = 0; i < 5; i++)
-;; A[2*i] = ...
-;; ... = A[10];
+;; for (long unsigned i = 0; i < 5; i++) {
+;; A[2*i] = i;
+;; *B++ = A[10];
define void @weakzerodst2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -83,27 +108,33 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx, align 4
%arrayidx1 = getelementptr inbounds i32* %A, i64 10
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 5
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 5
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long unsigned i = 0; i < 6; i++)
-;; A[2*i] = ...
-;; ... = A[10];
+;; for (long unsigned i = 0; i < 6; i++) {
+;; A[2*i] = i;
+;; *B++ = A[10];
define void @weakzerodst3(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [=>p|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -112,27 +143,33 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx, align 4
%arrayidx1 = getelementptr inbounds i32* %A, i64 10
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [=>p|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 6
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 6
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long unsigned i = 0; i < 7; i++)
-;; A[2*i] = ...
-;; ... = A[10];
+;; for (long unsigned i = 0; i < 7; i++) {
+;; A[2*i] = i;
+;; *B++ = A[10];
define void @weakzerodst4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - flow [*|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -141,27 +178,33 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx, align 4
%arrayidx1 = getelementptr inbounds i32* %A, i64 10
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [*|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 7
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 7
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long unsigned i = 0; i < 7; i++)
-;; A[2*i] = ...
-;; ... = A[-10];
+;; for (long unsigned i = 0; i < 7; i++) {
+;; A[2*i] = i;
+;; *B++ = A[-10];
define void @weakzerodst5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -170,43 +213,54 @@ for.body: ; preds = %for.body, %entry
store i32 %conv, i32* %arrayidx, align 4
%arrayidx1 = getelementptr inbounds i32* %A, i64 -10
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 7
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 7
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long unsigned i = 0; i < n; i++)
-;; A[3*i] = ...
-;; ... = A[10];
+;; for (long unsigned i = 0; i < n; i++) {
+;; A[3*i] = i;
+;; *B++ = A[10];
define void @weakzerodst6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp eq i64 %n, 0
- br i1 %cmp1, label %for.end, label %for.body
+ br i1 %cmp1, label %for.end, label %for.body.preheader
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [S|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
%conv = trunc i64 %i.03 to i32
%mul = mul i64 %i.03, 3
%arrayidx = getelementptr inbounds i32* %A, i64 %mul
store i32 %conv, i32* %arrayidx, align 4
%arrayidx1 = getelementptr inbounds i32* %A, i64 10
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc = add i64 %i.03, 1
- %cmp = icmp ult i64 %inc, %n
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
-for.end: ; preds = %for.body, %entry
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
diff --git a/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll b/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll
index fd4f462695..2edba66901 100644
--- a/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll
+++ b/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll
@@ -5,15 +5,22 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-macosx10.6.0"
-;; for (long unsigned i = 0; i < 30; i++)
-;; A[10] = ...
-;; ... = A[2*i + 10];
+;; for (long unsigned i = 0; i < 30; i++) {
+;; A[10] = i;
+;; *B++ = A[2*i + 10];
define void @weakzerosrc0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [S|<]!
+; CHECK: da analyze - flow [p<=|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -23,30 +30,39 @@ for.body: ; preds = %for.body, %entry
%add = add i64 %mul, 10
%arrayidx1 = getelementptr inbounds i32* %A, i64 %add
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [p<=|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 30
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 30
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long unsigned i = 0; i < n; i++)
-;; A[10] = ...
-;; ... = A[n*i + 10];
+;; for (long unsigned i = 0; i < n; i++) {
+;; A[10] = i;
+;; *B++ = A[n*i + 10];
define void @weakzerosrc1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp eq i64 %n, 0
- br i1 %cmp1, label %for.end, label %for.body
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+; CHECK: da analyze - consistent output [S|<]!
+; CHECK: da analyze - flow [p<=|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
%conv = trunc i64 %i.03 to i32
%arrayidx = getelementptr inbounds i32* %A, i64 10
store i32 %conv, i32* %arrayidx, align 4
@@ -54,27 +70,36 @@ for.body: ; preds = %for.body, %entry
%add = add i64 %mul, 10
%arrayidx1 = getelementptr inbounds i32* %A, i64 %add
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [p<=|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc = add i64 %i.03, 1
- %cmp = icmp ult i64 %inc, %n
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
-for.end: ; preds = %for.body, %entry
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
-;; for (long unsigned i = 0; i < 5; i++)
-;; A[10] = ...
-;; ... = A[2*i];
+;; for (long unsigned i = 0; i < 5; i++) {
+;; A[10] = i;
+;; *B++ = A[2*i];
define void @weakzerosrc2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [S|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -83,27 +108,33 @@ for.body: ; preds = %for.body, %entry
%mul = shl i64 %i.02, 1
%arrayidx1 = getelementptr inbounds i32* %A, i64 %mul
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 5
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 5
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long unsigned i = 0; i < 6; i++)
-;; A[10] = ...
-;; ... = A[2*i];
+;; for (long unsigned i = 0; i < 6; i++) {
+;; A[10] = i;
+;; *B++ = A[2*i];
define void @weakzerosrc3(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [S|<]!
+; CHECK: da analyze - flow [=>p|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -112,27 +143,33 @@ for.body: ; preds = %for.body, %entry
%mul = shl i64 %i.02, 1
%arrayidx1 = getelementptr inbounds i32* %A, i64 %mul
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [=>p|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 6
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 6
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long unsigned i = 0; i < 7; i++)
-;; A[10] = ...
-;; ... = A[2*i];
+;; for (long unsigned i = 0; i < 7; i++) {
+;; A[10] = i;
+;; *B++ = A[2*i];
define void @weakzerosrc4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [S|<]!
+; CHECK: da analyze - flow [*|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -141,27 +178,33 @@ for.body: ; preds = %for.body, %entry
%mul = shl i64 %i.02, 1
%arrayidx1 = getelementptr inbounds i32* %A, i64 %mul
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow [*|<]!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 7
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 7
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long unsigned i = 0; i < 7; i++)
-;; A[-10] = ...
-;; ... = A[2*i];
+;; for (long unsigned i = 0; i < 7; i++) {
+;; A[-10] = i;
+;; *B++ = A[2*i];
define void @weakzerosrc5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
br label %for.body
-for.body: ; preds = %for.body, %entry
+; CHECK: da analyze - consistent output [S|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body: ; preds = %entry, %for.body
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%B.addr.01 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
%conv = trunc i64 %i.02 to i32
@@ -170,43 +213,54 @@ for.body: ; preds = %for.body, %entry
%mul = shl i64 %i.02, 1
%arrayidx1 = getelementptr inbounds i32* %A, i64 %mul
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.01, i64 1
store i32 %0, i32* %B.addr.01, align 4
%inc = add i64 %i.02, 1
- %cmp = icmp ult i64 %inc, 7
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, 7
+ br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
ret void
}
-;; for (long unsigned i = 0; i < n; i++)
-;; A[10] = ...
-;; ... = A[3*i];
+;; for (long unsigned i = 0; i < n; i++) {
+;; A[10] = i;
+;; *B++ = A[3*i];
define void @weakzerosrc6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%cmp1 = icmp eq i64 %n, 0
- br i1 %cmp1, label %for.end, label %for.body
+ br i1 %cmp1, label %for.end, label %for.body.preheader
-for.body: ; preds = %for.body, %entry
- %i.03 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+; CHECK: da analyze - consistent output [S|<]!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output [0|<]!
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %B.addr.02 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %for.body.preheader ]
%conv = trunc i64 %i.03 to i32
%arrayidx = getelementptr inbounds i32* %A, i64 10
store i32 %conv, i32* %arrayidx, align 4
%mul = mul i64 %i.03, 3
%arrayidx1 = getelementptr inbounds i32* %A, i64 %mul
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
%incdec.ptr = getelementptr inbounds i32* %B.addr.02, i64 1
store i32 %0, i32* %B.addr.02, align 4
%inc = add i64 %i.03, 1
- %cmp = icmp ult i64 %inc, %n
- br i1 %cmp, label %for.body, label %for.end
+ %exitcond = icmp ne i64 %inc, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
-for.end: ; preds = %for.body, %entry
+for.end: ; preds = %for.end.loopexit, %entry
ret void
}
diff --git a/test/Analysis/DependenceAnalysis/ZIV.ll b/test/Analysis/DependenceAnalysis/ZIV.ll
index 42b2389df2..1e833baf28 100644
--- a/test/Analysis/DependenceAnalysis/ZIV.ll
+++ b/test/Analysis/DependenceAnalysis/ZIV.ll
@@ -5,49 +5,70 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-macosx10.6.0"
-;; A[n + 1] = ...
-;; ... = A[1 + n];
+;; A[n + 1] = 0;
+;; *B = A[1 + n];
define void @z0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%add = add i64 %n, 1
%arrayidx = getelementptr inbounds i32* %A, i64 %add
store i32 0, i32* %arrayidx, align 4
+
+; CHECK: da analyze - consistent output!
+; CHECK: da analyze - consistent flow!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output!
+
%add1 = add i64 %n, 1
%arrayidx2 = getelementptr inbounds i32* %A, i64 %add1
%0 = load i32* %arrayidx2, align 4
-; CHECK: da analyze - consistent flow!
store i32 %0, i32* %B, align 4
ret void
}
-;; A[n] = ...
-;; ... = A[n + 1];
+;; A[n] = 0;
+;; *B = A[n + 1];
define void @z1(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
entry:
%arrayidx = getelementptr inbounds i32* %A, i64 %n
store i32 0, i32* %arrayidx, align 4
+
+; CHECK: da analyze - consistent output!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output!
+
%add = add i64 %n, 1
%arrayidx1 = getelementptr inbounds i32* %A, i64 %add
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - none!
store i32 %0, i32* %B, align 4
ret void
}
-;; A[n] = ...
-;; ... = A[m];
+;; A[n] = 0;
+;; *B = A[m];
define void @z2(i32* %A, i32* %B, i64 %n, i64 %m) nounwind uwtable ssp {
entry:
%arrayidx = getelementptr inbounds i32* %A, i64 %n
store i32 0, i32* %arrayidx, align 4
+
+; CHECK: da analyze - consistent output!
+; CHECK: da analyze - flow!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent input!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - consistent output!
+
%arrayidx1 = getelementptr inbounds i32* %A, i64 %m
%0 = load i32* %arrayidx1, align 4
-; CHECK: da analyze - flow!
store i32 %0, i32* %B, align 4
ret void
}
diff --git a/test/Assembler/getelementptr.ll b/test/Assembler/getelementptr.ll
index ce6866d544..af03fca6d2 100644
--- a/test/Assembler/getelementptr.ll
+++ b/test/Assembler/getelementptr.ll
@@ -7,12 +7,12 @@
@C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 3, i64 2, i64 0, i64 0, i64 7523)
; CHECK: @C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 39, i64 1, i64 1, i64 4, i64 5)
-;; Verify that i16 indices work.
+; Verify that i16 indices work.
@x = external global {i32, i32}
@y = global i32* getelementptr ({ i32, i32 }* @x, i16 42, i32 0)
; CHECK: @y = global i32* getelementptr ({ i32, i32 }* @x, i16 42, i32 0)
-; see if i92 indices work too.
+; See if i92 indices work too.
define i32 *@test({i32, i32}* %t, i92 %n) {
; CHECK: @test
; CHECK: %B = getelementptr { i32, i32 }* %t, i92 %n, i32 0
@@ -20,3 +20,18 @@ define i32 *@test({i32, i32}* %t, i92 %n) {
ret i32* %B
}
+; Verify that constant expression vector GEPs work.
+
+@z = global <2 x i32*> getelementptr (<2 x [3 x {i32, i32}]*> zeroinitializer, <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 1, i32 1>)
+
+; Verify that struct GEP works with a vector of pointers.
+define <2 x i32*> @test7(<2 x {i32, i32}*> %a) {
+ %w = getelementptr <2 x {i32, i32}*> %a, <2 x i32> <i32 5, i32 9>, <2 x i32> zeroinitializer
+ ret <2 x i32*> %w
+}
+
+; Verify that array GEP works with a vector of pointers.
+define <2 x i8*> @test8(<2 x [2 x i8]*> %a) {
+ %w = getelementptr <2 x [2 x i8]*> %a, <2 x i32> <i32 0, i32 0>, <2 x i8> <i8 0, i8 1>
+ ret <2 x i8*> %w
+}
diff --git a/test/Assembler/getelementptr_vec_idx1.ll b/test/Assembler/getelementptr_vec_idx1.ll
new file mode 100644
index 0000000000..d2479f4404
--- /dev/null
+++ b/test/Assembler/getelementptr_vec_idx1.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s >/dev/null 2> %t
+; RUN: FileCheck %s < %t
+; Test that a vector index is only used with a vector pointer.
+
+; CHECK: getelementptr index type missmatch
+
+define i32 @test(i32* %a) {
+ %w = getelementptr i32* %a, <2 x i32> <i32 5, i32 9>
+ ret i32 %w
+}
diff --git a/test/Assembler/getelementptr_vec_idx2.ll b/test/Assembler/getelementptr_vec_idx2.ll
new file mode 100644
index 0000000000..8b71ce3095
--- /dev/null
+++ b/test/Assembler/getelementptr_vec_idx2.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s >/dev/null 2> %t
+; RUN: FileCheck %s < %t
+; Test that a vector pointer is only used with a vector index.
+
+; CHECK: getelementptr index type missmatch
+
+define <2 x i32> @test(<2 x i32*> %a) {
+ %w = getelementptr <2 x i32*> %a, i32 2
+ ret <2 x i32> %w
+}
diff --git a/test/Assembler/getelementptr_vec_idx3.ll b/test/Assembler/getelementptr_vec_idx3.ll
new file mode 100644
index 0000000000..1f6c29b3cc
--- /dev/null
+++ b/test/Assembler/getelementptr_vec_idx3.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s >/dev/null 2> %t
+; RUN: FileCheck %s < %t
+; Test that vector indices have the same number of elements as the pointer.
+
+; CHECK: getelementptr index type missmatch
+
+define <4 x i32> @test(<4 x i32>* %a) {
+ %w = getelementptr <4 x i32>* %a, <2 x i32> <i32 5, i32 9>
+ ret i32 %w
+}
diff --git a/test/Assembler/getelementptr_vec_struct.ll b/test/Assembler/getelementptr_vec_struct.ll
new file mode 100644
index 0000000000..ec66836bac
--- /dev/null
+++ b/test/Assembler/getelementptr_vec_struct.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s >/dev/null 2> %t
+; RUN: FileCheck %s < %t
+; Test that a vector struct index with non-equal elements is rejected.
+
+; CHECK: invalid getelementptr indices
+
+define <2 x i32*> @test7(<2 x {i32, i32}*> %a) {
+ %w = getelementptr <2 x {i32, i32}*> %a, <2 x i32> <i32 5, i32 9>, <2 x i32> <i32 0, i32 1>
+ ret <2 x i32*> %w
+}
diff --git a/test/Assembler/global-addrspace-forwardref.ll b/test/Assembler/global-addrspace-forwardref.ll
new file mode 100644
index 0000000000..f0f094a224
--- /dev/null
+++ b/test/Assembler/global-addrspace-forwardref.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; Make sure the address space of forward decls is preserved
+
+; CHECK: @a2 = global i8 addrspace(1)* @a
+; CHECK: @a = addrspace(1) global i8 0
+@a2 = global i8 addrspace(1)* @a
+@a = addrspace(1) global i8 0
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index e10a532341..e146ae1e65 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -12,11 +12,8 @@ if(NOT LLVM_BUILD_TOOLS)
set(EXCLUDE_FROM_ALL ON)
endif()
-add_lit_testsuite(check-llvm "Running the LLVM regression tests"
- ${CMAKE_CURRENT_BINARY_DIR}
- PARAMS llvm_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
- llvm_unit_site_config=${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
- DEPENDS UnitTests
+# Set the depends list as a variable so that it can grow conditionally.
+set(LLVM_TEST_DEPENDS UnitTests
BugpointPasses LLVMHello
llc lli llvm-ar llvm-as
llvm-bcanalyzer llvm-diff
@@ -30,7 +27,18 @@ add_lit_testsuite(check-llvm "Running the LLVM regression tests"
macho-dump opt
profile_rt-shared
FileCheck count not
- yaml2obj
+ yaml2obj)
+
+# If Intel JIT events are supported, depend on a tool that tests the listener.
+if( LLVM_USE_INTEL_JITEVENTS )
+ set(LLVM_TEST_DEPENDS ${LLVM_TEST_DEPENDS} llvm-jitlistener)
+endif( LLVM_USE_INTEL_JITEVENTS )
+
+add_lit_testsuite(check-llvm "Running the LLVM regression tests"
+ ${CMAKE_CURRENT_BINARY_DIR}
+ PARAMS llvm_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+ llvm_unit_site_config=${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
+ DEPENDS ${LLVM_TEST_DEPENDS}
)
set_target_properties(check-llvm PROPERTIES FOLDER "Tests")
diff --git a/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
index b05ec6367e..ca0964a059 100644
--- a/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
+++ b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
@@ -13,6 +13,7 @@
; CHECK-NOT: ch SU
; CHECK: ch SU(2): Latency=1
; CHECK-NOT: ch SU
+; CHECK: Successors:
; CHECK: ** List Scheduling
; CHECK: SU(2){{.*}}STR{{.*}}
; CHECK-NOT: ch SU
@@ -22,6 +23,7 @@
; CHECK-NOT: ch SU
; CHECK: ch SU(2): Latency=1
; CHECK-NOT: ch SU
+; CHECK: Successors:
define i32 @f1(i32* nocapture %p1, i32* nocapture %p2) nounwind {
entry:
store volatile i32 65540, i32* %p1, align 4, !tbaa !0
diff --git a/test/CodeGen/ARM/2012-11-14-subs_carry.ll b/test/CodeGen/ARM/2012-11-14-subs_carry.ll
new file mode 100644
index 0000000000..38700f3a8d
--- /dev/null
+++ b/test/CodeGen/ARM/2012-11-14-subs_carry.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+
+;CHECK: foo
+;CHECK: adds
+;CHECK-NEXT: adc
+;CHECK-NEXT: bx
+
+;rdar://12028498
+
+define i32 @foo() nounwind ssp {
+entry:
+ %tmp2 = zext i32 3 to i64
+ br label %bug_block
+
+bug_block:
+ %tmp410 = and i64 1031, 1647010
+ %tmp411 = and i64 %tmp2, -211
+ %tmp412 = shl i64 %tmp410, %tmp2
+ %tmp413 = shl i64 %tmp411, %tmp2
+ %tmp415 = and i64 %tmp413, 1
+ %tmp420 = xor i64 0, %tmp415
+ %tmp421 = and i64 %tmp412, %tmp415
+ %tmp422 = shl i64 %tmp421, 1
+ br label %finish
+
+finish:
+ %tmp423 = lshr i64 %tmp422, 32
+ %tmp424 = trunc i64 %tmp423 to i32
+ ret i32 %tmp424
+}
+
diff --git a/test/CodeGen/ARM/arm-ttype-target2.ll b/test/CodeGen/ARM/arm-ttype-target2.ll
new file mode 100644
index 0000000000..8b5087f89c
--- /dev/null
+++ b/test/CodeGen/ARM/arm-ttype-target2.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=armv7-none-linux-gnueabi -arm-enable-ehabi -arm-enable-ehabi-descriptors < %s | FileCheck %s
+
+@_ZTVN10__cxxabiv117__class_type_infoE = external global i8*
+@_ZTS3Foo = linkonce_odr constant [5 x i8] c"3Foo\00"
+@_ZTI3Foo = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([5 x i8]* @_ZTS3Foo, i32 0, i32 0) }
+
+define i32 @main() {
+entry:
+ invoke void @_Z3foov()
+ to label %return unwind label %lpad
+
+lpad: ; preds = %entry
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ catch i8* bitcast ({ i8*, i8* }* @_ZTI3Foo to i8*)
+ %1 = extractvalue { i8*, i32 } %0, 1
+ %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast ({ i8*, i8* }* @_ZTI3Foo to i8*)) nounwind
+; CHECK: _ZTI3Foo(target2)
+
+ %matches = icmp eq i32 %1, %2
+ br i1 %matches, label %catch, label %eh.resume
+
+catch: ; preds = %lpad
+ %3 = extractvalue { i8*, i32 } %0, 0
+ %4 = tail call i8* @__cxa_begin_catch(i8* %3) nounwind
+ tail call void @__cxa_end_catch()
+ br label %return
+
+return: ; preds = %entry, %catch
+ %retval.0 = phi i32 [ 1, %catch ], [ 0, %entry ]
+ ret i32 %retval.0
+
+eh.resume: ; preds = %lpad
+ resume { i8*, i32 } %0
+}
+
+declare void @_Z3foov()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll
index e9609ac0f9..be51e3c129 100644
--- a/test/CodeGen/ARM/atomic-64bit.ll
+++ b/test/CodeGen/ARM/atomic-64bit.ll
@@ -1,12 +1,12 @@
; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
define i64 @test1(i64* %ptr, i64 %val) {
-; CHECK: test1
+; CHECK: test1:
; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: adds r0, r2
-; CHECK: adc r1, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: adds [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK: adc [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK: cmp
; CHECK: bne
; CHECK: dmb ish
@@ -15,12 +15,12 @@ define i64 @test1(i64* %ptr, i64 %val) {
}
define i64 @test2(i64* %ptr, i64 %val) {
-; CHECK: test2
+; CHECK: test2:
; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: subs r0, r2
-; CHECK: sbc r1, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: subs [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK: sbc [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK: cmp
; CHECK: bne
; CHECK: dmb ish
@@ -29,12 +29,12 @@ define i64 @test2(i64* %ptr, i64 %val) {
}
define i64 @test3(i64* %ptr, i64 %val) {
-; CHECK: test3
+; CHECK: test3:
; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: and r0, r2
-; CHECK: and r1, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: and [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK: and [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK: cmp
; CHECK: bne
; CHECK: dmb ish
@@ -43,12 +43,12 @@ define i64 @test3(i64* %ptr, i64 %val) {
}
define i64 @test4(i64* %ptr, i64 %val) {
-; CHECK: test4
+; CHECK: test4:
; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: orr r0, r2
-; CHECK: orr r1, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: orr [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK: orr [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK: cmp
; CHECK: bne
; CHECK: dmb ish
@@ -57,12 +57,12 @@ define i64 @test4(i64* %ptr, i64 %val) {
}
define i64 @test5(i64* %ptr, i64 %val) {
-; CHECK: test5
+; CHECK: test5:
; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: eor r0, r2
-; CHECK: eor r1, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: eor [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK: eor [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK: cmp
; CHECK: bne
; CHECK: dmb ish
@@ -71,10 +71,10 @@ define i64 @test5(i64* %ptr, i64 %val) {
}
define i64 @test6(i64* %ptr, i64 %val) {
-; CHECK: test6
+; CHECK: test6:
; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
; CHECK: cmp
; CHECK: bne
; CHECK: dmb ish
@@ -83,13 +83,13 @@ define i64 @test6(i64* %ptr, i64 %val) {
}
define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
-; CHECK: test7
+; CHECK: test7:
; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: cmp r2
-; CHECK: cmpeq r3
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: cmp [[REG1]]
+; CHECK: cmpeq [[REG2]]
; CHECK: bne
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
; CHECK: cmp
; CHECK: bne
; CHECK: dmb ish
@@ -100,12 +100,12 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
; Compiles down to cmpxchg
; FIXME: Should compile to a single ldrexd
define i64 @test8(i64* %ptr) {
-; CHECK: test8
-; CHECK: ldrexd r2, r3
-; CHECK: cmp r2
-; CHECK: cmpeq r3
+; CHECK: test8:
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: cmp [[REG1]]
+; CHECK: cmpeq [[REG2]]
; CHECK: bne
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
; CHECK: cmp
; CHECK: bne
; CHECK: dmb ish
@@ -116,10 +116,10 @@ define i64 @test8(i64* %ptr) {
; Compiles down to atomicrmw xchg; there really isn't any more efficient
; way to write it.
define void @test9(i64* %ptr, i64 %val) {
-; CHECK: test9
+; CHECK: test9:
; CHECK: dmb ish
-; CHECK: ldrexd r2, r3
-; CHECK: strexd {{[a-z0-9]+}}, r0, r1
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
; CHECK: cmp
; CHECK: bne
; CHECK: dmb ish
diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll
index 14511ad5ce..5bdad1d838 100644
--- a/test/CodeGen/ARM/coalesce-subregs.ll
+++ b/test/CodeGen/ARM/coalesce-subregs.ll
@@ -147,7 +147,7 @@ if.end: ; preds = %entry, %if.then
; CHECK: vmov.f32 {{.*}}, #1.0
; CHECK-NOT: vmov
; CHECK-NOT: vorr
-; CHECK: %if.end
+; CHECK: bx
; We may leave the last insertelement in the if.end block.
; It is inserting the %add value into a dead lane, but %add causes interference
; in the entry block, and we don't do dead lane checks across basic blocks.
diff --git a/test/CodeGen/ARM/domain-conv-vmovs.ll b/test/CodeGen/ARM/domain-conv-vmovs.ll
index a5c4114458..0ebac94e13 100644
--- a/test/CodeGen/ARM/domain-conv-vmovs.ll
+++ b/test/CodeGen/ARM/domain-conv-vmovs.ll
@@ -98,3 +98,23 @@ define i32 @test_vmovs_no_sreg(i32 %in) {
ret i32 %resi
}
+
+
+; The point of this test is:
+; + Make sure s1 is live before the BL
+; + Make sure s1 is clobbered by the BL
+; + Convince LLVM to emit a VMOV to S0
+; + Convince LLVM to domain-convert this.
+
+; When all of those are satisfied, LLVM should *not* mark s1 as an implicit-use
+; because it's dead.
+
+declare float @clobbers_s1(float, float)
+
+define <2 x float> @test_clobbers_recognised(<2 x float> %invec, float %val) {
+ %elt = call float @clobbers_s1(float %val, float %val)
+
+ %vec = insertelement <2 x float> %invec, float %elt, i32 0
+ %res = fadd <2 x float> %vec, %vec
+ ret <2 x float> %res
+}
diff --git a/test/CodeGen/ARM/ehabi-filters.ll b/test/CodeGen/ARM/ehabi-filters.ll
new file mode 100644
index 0000000000..d15aa7b32c
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-filters.ll
@@ -0,0 +1,77 @@
+; RUN: llc -arm-enable-ehabi -arm-enable-ehabi-descriptors < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+@_ZTIi = external constant i8*
+
+declare void @_Z3foov() noreturn;
+
+declare i8* @__cxa_allocate_exception(i32)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+declare void @__cxa_call_unexpected(i8*)
+
+define i32 @main() {
+; CHECK main:
+entry:
+ %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind
+ %0 = bitcast i8* %exception.i to i32*
+ store i32 42, i32* %0, align 4, !tbaa !0
+ invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (i8** @_ZTIi to i8*), i8* null) noreturn
+ to label %unreachable.i unwind label %lpad.i
+
+lpad.i: ; preds = %entry
+ %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ filter [1 x i8*] [i8* bitcast (i8** @_ZTIi to i8*)]
+ catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK: .long _ZTIi(target2) @ TypeInfo 1
+; CHECK: .long _ZTIi(target2) @ FilterInfo -1
+ %2 = extractvalue { i8*, i32 } %1, 1
+ %ehspec.fails.i = icmp slt i32 %2, 0
+ br i1 %ehspec.fails.i, label %ehspec.unexpected.i, label %lpad.body
+
+ehspec.unexpected.i: ; preds = %lpad.i
+ %3 = extractvalue { i8*, i32 } %1, 0
+ invoke void @__cxa_call_unexpected(i8* %3) noreturn
+ to label %.noexc unwind label %lpad
+
+.noexc: ; preds = %ehspec.unexpected.i
+ unreachable
+
+unreachable.i: ; preds = %entry
+ unreachable
+
+lpad: ; preds = %ehspec.unexpected.i
+ %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ catch i8* bitcast (i8** @_ZTIi to i8*)
+ br label %lpad.body
+
+lpad.body: ; preds = %lpad.i, %lpad
+ %eh.lpad-body = phi { i8*, i32 } [ %4, %lpad ], [ %1, %lpad.i ]
+ %5 = extractvalue { i8*, i32 } %eh.lpad-body, 1
+ %6 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind
+ %matches = icmp eq i32 %5, %6
+ br i1 %matches, label %try.cont, label %eh.resume
+
+try.cont: ; preds = %lpad.body
+ %7 = extractvalue { i8*, i32 } %eh.lpad-body, 0
+ %8 = tail call i8* @__cxa_begin_catch(i8* %7) nounwind
+ tail call void @__cxa_end_catch() nounwind
+ ret i32 0
+
+eh.resume: ; preds = %lpad.body
+ resume { i8*, i32 } %eh.lpad-body
+}
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/ehabi-no-landingpad.ll b/test/CodeGen/ARM/ehabi-no-landingpad.ll
new file mode 100644
index 0000000000..ac0dff421a
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-no-landingpad.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=armv7-unknown-linux-gnueabi \
+; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-unknown-linux-gnueabi"
+
+define void @_Z4testv() {
+; CHECK: _Z4testv
+; CHECK: .fnstart
+; CHECK: .size
+; CHECK-NOT: .handlerdata
+; CHECK: .fnend
+entry:
+ call void @_Z15throw_exceptionv()
+ ret void
+}
+
+declare void @_Z15throw_exceptionv()
diff --git a/test/CodeGen/ARM/fabs-neon.ll b/test/CodeGen/ARM/fabs-neon.ll
new file mode 100644
index 0000000000..614117ff7b
--- /dev/null
+++ b/test/CodeGen/ARM/fabs-neon.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=armv7-eabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s
+
+; CHECK: test:
+; CHECK: vabs.f32 q0, q0
+define <4 x float> @test(<4 x float> %a) {
+ %foo = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+ ret <4 x float> %foo
+}
+declare <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+
+; CHECK: test2:
+; CHECK: vabs.f32 d0, d0
+define <2 x float> @test2(<2 x float> %a) {
+ %foo = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+ ret <2 x float> %foo
+}
+declare <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
diff --git a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
index dbb634df0a..60bc6a62f5 100644
--- a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
+++ b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
%struct.A = type { i32, [2 x [2 x i32]], i8, [3 x [3 x [3 x i32]]] }
%struct.B = type { i32, [2 x [2 x [2 x %struct.A]]] }
diff --git a/test/CodeGen/ARM/fast-isel-br-const.ll b/test/CodeGen/ARM/fast-isel-br-const.ll
index 7c532d5fba..4e6efd2489 100644
--- a/test/CodeGen/ARM/fast-isel-br-const.ll
+++ b/test/CodeGen/ARM/fast-isel-br-const.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
entry:
diff --git a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
index 14721a4d80..b6f201728c 100644
--- a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
+++ b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
; Fast-isel can't handle non-double multi-reg retvals.
; This test just check to make sure we don't hit the assert in FinishCall.
diff --git a/test/CodeGen/ARM/fast-isel-crash.ll b/test/CodeGen/ARM/fast-isel-crash.ll
index 370c70f174..8fb4b66b7d 100644
--- a/test/CodeGen/ARM/fast-isel-crash.ll
+++ b/test/CodeGen/ARM/fast-isel-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=thumbv7-apple-darwin
%union.anon = type { <16 x i32> }
diff --git a/test/CodeGen/ARM/fast-isel-crash2.ll b/test/CodeGen/ARM/fast-isel-crash2.ll
index aa06299288..f245168a8e 100644
--- a/test/CodeGen/ARM/fast-isel-crash2.ll
+++ b/test/CodeGen/ARM/fast-isel-crash2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=thumbv7-apple-darwin
; rdar://9515076
; (Make sure this doesn't crash.)
diff --git a/test/CodeGen/ARM/fast-isel-deadcode.ll b/test/CodeGen/ARM/fast-isel-deadcode.ll
index 7e147c7b4d..3a943d854b 100644
--- a/test/CodeGen/ARM/fast-isel-deadcode.ll
+++ b/test/CodeGen/ARM/fast-isel-deadcode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
; Target-specific selector can't properly handle the double because it isn't
; being passed via a register, so the materialized arguments become dead code.
diff --git a/test/CodeGen/ARM/fast-isel-fold.ll b/test/CodeGen/ARM/fast-isel-fold.ll
index 61bd18504c..7a65295f01 100644
--- a/test/CodeGen/ARM/fast-isel-fold.ll
+++ b/test/CodeGen/ARM/fast-isel-fold.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
@a = global i8 1, align 1
@b = global i16 2, align 2
diff --git a/test/CodeGen/ARM/fast-isel-frameaddr.ll b/test/CodeGen/ARM/fast-isel-frameaddr.ll
index 8f7b2943b5..c256e73ab9 100644
--- a/test/CodeGen/ARM/fast-isel-frameaddr.ll
+++ b/test/CodeGen/ARM/fast-isel-frameaddr.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-ARM
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-ARM
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-THUMB2
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-THUMB2
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-THUMB2
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-THUMB2
define i8* @frameaddr_index0() nounwind {
entry:
diff --git a/test/CodeGen/ARM/neon_fpconv.ll b/test/CodeGen/ARM/neon_fpconv.ll
new file mode 100644
index 0000000000..1948ad8471
--- /dev/null
+++ b/test/CodeGen/ARM/neon_fpconv.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+; PR12540: ARM backend lowering of FP_ROUND v2f64 to v2f32.
+define <2 x float> @vtrunc(<2 x double> %a) {
+; CHECK: vcvt.f32.f64 [[S0:s[0-9]+]], [[D0:d[0-9]+]]
+; CHECK: vcvt.f32.f64 [[S1:s[0-9]+]], [[D1:d[0-9]+]]
+ %vt = fptrunc <2 x double> %a to <2 x float>
+ ret <2 x float> %vt
+}
+
+define <2 x double> @vextend(<2 x float> %a) {
+; CHECK: vcvt.f64.f32 [[D0:d[0-9]+]], [[S0:s[0-9]+]]
+; CHECK: vcvt.f64.f32 [[D1:d[0-9]+]], [[S1:s[0-9]+]]
+ %ve = fpext <2 x float> %a to <2 x double>
+ ret <2 x double> %ve
+}
+
diff --git a/test/CodeGen/CellSPU/2009-01-01-BrCond.ll b/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
deleted file mode 100644
index 35422311c5..0000000000
--- a/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: llc < %s -march=cellspu -o - | grep brz
-; PR3274
-
-target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128"
-target triple = "spu"
- %struct.anon = type { i64 }
- %struct.fp_number_type = type { i32, i32, i32, [4 x i8], %struct.anon }
-
-define double @__floatunsidf(i32 %arg_a) nounwind {
-entry:
- %in = alloca %struct.fp_number_type, align 16
- %0 = getelementptr %struct.fp_number_type* %in, i32 0, i32 1
- store i32 0, i32* %0, align 4
- %1 = icmp eq i32 %arg_a, 0
- %2 = getelementptr %struct.fp_number_type* %in, i32 0, i32 0
- br i1 %1, label %bb, label %bb1
-
-bb: ; preds = %entry
- store i32 2, i32* %2, align 8
- br label %bb7
-
-bb1: ; preds = %entry
- ret double 0.0
-
-bb7: ; preds = %bb5, %bb1, %bb
- ret double 1.0
-}
-
-; declare i32 @llvm.ctlz.i32(i32) nounwind readnone
-
-declare double @__pack_d(%struct.fp_number_type*)
diff --git a/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 401399face..0000000000
--- a/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=cellspu -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
- call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
- ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll
deleted file mode 100644
index 4203e91068..0000000000
--- a/test/CodeGen/CellSPU/and_ops.ll
+++ /dev/null
@@ -1,282 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep and %t1.s | count 234
-; RUN: grep andc %t1.s | count 85
-; RUN: grep andi %t1.s | count 37
-; RUN: grep andhi %t1.s | count 30
-; RUN: grep andbi %t1.s | count 4
-
-; CellSPU legalization is over-sensitive to Legalize's traversal order.
-; XFAIL: *
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; AND instruction generation:
-define <4 x i32> @and_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
- %A = and <4 x i32> %arg1, %arg2
- ret <4 x i32> %A
-}
-
-define <4 x i32> @and_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
- %A = and <4 x i32> %arg2, %arg1
- ret <4 x i32> %A
-}
-
-define <8 x i16> @and_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
- %A = and <8 x i16> %arg1, %arg2
- ret <8 x i16> %A
-}
-
-define <8 x i16> @and_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
- %A = and <8 x i16> %arg2, %arg1
- ret <8 x i16> %A
-}
-
-define <16 x i8> @and_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
- %A = and <16 x i8> %arg2, %arg1
- ret <16 x i8> %A
-}
-
-define <16 x i8> @and_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
- %A = and <16 x i8> %arg1, %arg2
- ret <16 x i8> %A
-}
-
-define i32 @and_i32_1(i32 %arg1, i32 %arg2) {
- %A = and i32 %arg2, %arg1
- ret i32 %A
-}
-
-define i32 @and_i32_2(i32 %arg1, i32 %arg2) {
- %A = and i32 %arg1, %arg2
- ret i32 %A
-}
-
-define i16 @and_i16_1(i16 %arg1, i16 %arg2) {
- %A = and i16 %arg2, %arg1
- ret i16 %A
-}
-
-define i16 @and_i16_2(i16 %arg1, i16 %arg2) {
- %A = and i16 %arg1, %arg2
- ret i16 %A
-}
-
-define i8 @and_i8_1(i8 %arg1, i8 %arg2) {
- %A = and i8 %arg2, %arg1
- ret i8 %A
-}
-
-define i8 @and_i8_2(i8 %arg1, i8 %arg2) {
- %A = and i8 %arg1, %arg2
- ret i8 %A
-}
-
-; ANDC instruction generation:
-define <4 x i32> @andc_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
- %A = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 >
- %B = and <4 x i32> %arg1, %A
- ret <4 x i32> %B
-}
-
-define <4 x i32> @andc_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
- %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
- %B = and <4 x i32> %arg2, %A
- ret <4 x i32> %B
-}
-
-define <4 x i32> @andc_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) {
- %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
- %B = and <4 x i32> %A, %arg2
- ret <4 x i32> %B
-}
-
-define <8 x i16> @andc_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
- %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1,
- i16 -1, i16 -1, i16 -1, i16 -1 >
- %B = and <8 x i16> %arg1, %A
- ret <8 x i16> %B
-}
-
-define <8 x i16> @andc_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
- %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1,
- i16 -1, i16 -1, i16 -1, i16 -1 >
- %B = and <8 x i16> %arg2, %A
- ret <8 x i16> %B
-}
-
-define <16 x i8> @andc_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
- %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1 >
- %B = and <16 x i8> %arg2, %A
- ret <16 x i8> %B
-}
-
-define <16 x i8> @andc_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
- %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1 >
- %B = and <16 x i8> %arg1, %A
- ret <16 x i8> %B
-}
-
-define <16 x i8> @andc_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2) {
- %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1 >
- %B = and <16 x i8> %A, %arg1
- ret <16 x i8> %B
-}
-
-define i32 @andc_i32_1(i32 %arg1, i32 %arg2) {
- %A = xor i32 %arg2, -1
- %B = and i32 %A, %arg1
- ret i32 %B
-}
-
-define i32 @andc_i32_2(i32 %arg1, i32 %arg2) {
- %A = xor i32 %arg1, -1
- %B = and i32 %A, %arg2
- ret i32 %B
-}
-
-define i32 @andc_i32_3(i32 %arg1, i32 %arg2) {
- %A = xor i32 %arg2, -1
- %B = and i32 %arg1, %A
- ret i32 %B
-}
-
-define i16 @andc_i16_1(i16 %arg1, i16 %arg2) {
- %A = xor i16 %arg2, -1
- %B = and i16 %A, %arg1
- ret i16 %B
-}
-
-define i16 @andc_i16_2(i16 %arg1, i16 %arg2) {
- %A = xor i16 %arg1, -1
- %B = and i16 %A, %arg2
- ret i16 %B
-}
-
-define i16 @andc_i16_3(i16 %arg1, i16 %arg2) {
- %A = xor i16 %arg2, -1
- %B = and i16 %arg1, %A
- ret i16 %B
-}
-
-define i8 @andc_i8_1(i8 %arg1, i8 %arg2) {
- %A = xor i8 %arg2, -1
- %B = and i8 %A, %arg1
- ret i8 %B
-}
-
-define i8 @andc_i8_2(i8 %arg1, i8 %arg2) {
- %A = xor i8 %arg1, -1
- %B = and i8 %A, %arg2
- ret i8 %B
-}
-
-define i8 @andc_i8_3(i8 %arg1, i8 %arg2) {
- %A = xor i8 %arg2, -1
- %B = and i8 %arg1, %A
- ret i8 %B
-}
-
-; ANDI instruction generation (i32 data type):
-define <4 x i32> @andi_v4i32_1(<4 x i32> %in) {
- %tmp2 = and <4 x i32> %in, < i32 511, i32 511, i32 511, i32 511 >
- ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @andi_v4i32_2(<4 x i32> %in) {
- %tmp2 = and <4 x i32> %in, < i32 510, i32 510, i32 510, i32 510 >
- ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @andi_v4i32_3(<4 x i32> %in) {
- %tmp2 = and <4 x i32> %in, < i32 -1, i32 -1, i32 -1, i32 -1 >
- ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @andi_v4i32_4(<4 x i32> %in) {
- %tmp2 = and <4 x i32> %in, < i32 -512, i32 -512, i32 -512, i32 -512 >
- ret <4 x i32> %tmp2
-}
-
-define zeroext i32 @andi_u32(i32 zeroext %in) {
- %tmp37 = and i32 %in, 37
- ret i32 %tmp37
-}
-
-define signext i32 @andi_i32(i32 signext %in) {
- %tmp38 = and i32 %in, 37
- ret i32 %tmp38
-}
-
-define i32 @andi_i32_1(i32 %in) {
- %tmp37 = and i32 %in, 37
- ret i32 %tmp37
-}
-
-; ANDHI instruction generation (i16 data type):
-define <8 x i16> @andhi_v8i16_1(<8 x i16> %in) {
- %tmp2 = and <8 x i16> %in, < i16 511, i16 511, i16 511, i16 511,
- i16 511, i16 511, i16 511, i16 511 >
- ret <8 x i16> %tmp2
-}
-
-define <8 x i16> @andhi_v8i16_2(<8 x i16> %in) {
- %tmp2 = and <8 x i16> %in, < i16 510, i16 510, i16 510, i16 510,
- i16 510, i16 510, i16 510, i16 510 >
- ret <8 x i16> %tmp2
-}
-
-define <8 x i16> @andhi_v8i16_3(<8 x i16> %in) {
- %tmp2 = and <8 x i16> %in, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
- i16 -1, i16 -1, i16 -1 >
- ret <8 x i16> %tmp2
-}
-
-define <8 x i16> @andhi_v8i16_4(<8 x i16> %in) {
- %tmp2 = and <8 x i16> %in, < i16 -512, i16 -512, i16 -512, i16 -512,
- i16 -512, i16 -512, i16 -512, i16 -512 >
- ret <8 x i16> %tmp2
-}
-
-define zeroext i16 @andhi_u16(i16 zeroext %in) {
- %tmp37 = and i16 %in, 37 ; <i16> [#uses=1]
- ret i16 %tmp37
-}
-
-define signext i16 @andhi_i16(i16 signext %in) {
- %tmp38 = and i16 %in, 37 ; <i16> [#uses=1]
- ret i16 %tmp38
-}
-
-; i8 data type (s/b ANDBI if 8-bit registers were supported):
-define <16 x i8> @and_v16i8(<16 x i8> %in) {
- ; ANDBI generated for vector types
- %tmp2 = and <16 x i8> %in, < i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
- i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
- i8 42, i8 42, i8 42, i8 42 >
- ret <16 x i8> %tmp2
-}
-
-define zeroext i8 @and_u8(i8 zeroext %in) {
- ; ANDBI generated:
- %tmp37 = and i8 %in, 37
- ret i8 %tmp37
-}
-
-define signext i8 @and_sext8(i8 signext %in) {
- ; ANDBI generated
- %tmp38 = and i8 %in, 37
- ret i8 %tmp38
-}
-
-define i8 @and_i8(i8 %in) {
- ; ANDBI generated
- %tmp38 = and i8 %in, 205
- ret i8 %tmp38
-}
diff --git a/test/CodeGen/CellSPU/arg_ret.ll b/test/CodeGen/CellSPU/arg_ret.ll
deleted file mode 100644
index 7410b724d6..0000000000
--- a/test/CodeGen/CellSPU/arg_ret.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; Test parameter passing and return values
-;RUN: llc --march=cellspu %s -o - | FileCheck %s
-
-; this fits into registers r3-r74
-%paramstruct = type { i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
- i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
- i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
- i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
- i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
- i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32}
-define ccc i32 @test_regs( %paramstruct %prm )
-{
-;CHECK: lr $3, $74
-;CHECK: bi $lr
- %1 = extractvalue %paramstruct %prm, 71
- ret i32 %1
-}
-
-define ccc i32 @test_regs_and_stack( %paramstruct %prm, i32 %stackprm )
-{
-;CHECK-NOT: a $3, $74, $75
- %1 = extractvalue %paramstruct %prm, 71
- %2 = add i32 %1, %stackprm
- ret i32 %2
-}
-
-define ccc %paramstruct @test_return( i32 %param, %paramstruct %prm )
-{
-;CHECK: lqd {{\$[0-9]+}}, 80($sp)
-;CHECK-NOT: ori {{\$[0-9]+, \$[0-9]+, 0}}
-;CHECK: lr $3, $4
- ret %paramstruct %prm
-}
-
diff --git a/test/CodeGen/CellSPU/bigstack.ll b/test/CodeGen/CellSPU/bigstack.ll
deleted file mode 100644
index 63293e2aec..0000000000
--- a/test/CodeGen/CellSPU/bigstack.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=cellspu -o %t1.s
-; RUN: grep lqx %t1.s | count 3
-; RUN: grep il %t1.s | grep -v file | count 5
-; RUN: grep stqx %t1.s | count 1
-
-define i32 @bigstack() nounwind {
-entry:
- %avar = alloca i32
- %big_data = alloca [2048 x i32]
- store i32 3840, i32* %avar, align 4
- br label %return
-
-return:
- %retval = load i32* %avar
- ret i32 %retval
-}
-
diff --git a/test/CodeGen/CellSPU/bss.ll b/test/CodeGen/CellSPU/bss.ll
deleted file mode 100644
index 327800d09c..0000000000
--- a/test/CodeGen/CellSPU/bss.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-@bssVar = global i32 zeroinitializer
-; CHECK: .section .bss
-; CHECK-NEXT: .globl
-
-@localVar= internal global i32 zeroinitializer
-; CHECK-NOT: .lcomm
-; CHECK: .local
-; CHECK-NEXT: .comm
-
diff --git a/test/CodeGen/CellSPU/call.ll b/test/CodeGen/CellSPU/call.ll
deleted file mode 100644
index 11cf770145..0000000000
--- a/test/CodeGen/CellSPU/call.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define i32 @main() {
-entry:
- %a = call i32 @stub_1(i32 1, float 0x400921FA00000000)
- call void @extern_stub_1(i32 %a, i32 4)
- ret i32 %a
-}
-
-declare void @extern_stub_1(i32, i32)
-
-define i32 @stub_1(i32 %x, float %y) {
- ; CHECK: il $3, 0
- ; CHECK: bi $lr
-entry:
- ret i32 0
-}
-
-; vararg call: ensure that all caller-saved registers are spilled to the
-; stack:
-define i32 @stub_2(...) {
-entry:
- ret i32 0
-}
-
-; check that struct is passed in r3->
-; assert this by changing the second field in the struct
-%0 = type { i32, i32, i32 }
-declare %0 @callee()
-define %0 @test_structret()
-{
-;CHECK: stqd $lr, 16($sp)
-;CHECK: stqd $sp, -48($sp)
-;CHECK: ai $sp, $sp, -48
-;CHECK: brasl $lr, callee
- %rv = call %0 @callee()
-;CHECK: ai $4, $4, 1
-;CHECK: lqd $lr, 64($sp)
-;CHECK: ai $sp, $sp, 48
-;CHECK: bi $lr
- %oldval = extractvalue %0 %rv, 1
- %newval = add i32 %oldval,1
- %newrv = insertvalue %0 %rv, i32 %newval, 1
- ret %0 %newrv
-}
-
diff --git a/test/CodeGen/CellSPU/crash.ll b/test/CodeGen/CellSPU/crash.ll
deleted file mode 100644
index cc2ab71db3..0000000000
--- a/test/CodeGen/CellSPU/crash.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc %s -march=cellspu -o -
-declare i8 @return_i8()
-declare i16 @return_i16()
-define void @testfunc() {
- %rv1 = call i8 @return_i8()
- %rv2 = call i16 @return_i16()
- ret void
-} \ No newline at end of file
diff --git a/test/CodeGen/CellSPU/ctpop.ll b/test/CodeGen/CellSPU/ctpop.ll
deleted file mode 100644
index e1a6cd8292..0000000000
--- a/test/CodeGen/CellSPU/ctpop.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep cntb %t1.s | count 3
-; RUN: grep andi %t1.s | count 3
-; RUN: grep rotmi %t1.s | count 2
-; RUN: grep rothmi %t1.s | count 1
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-declare i8 @llvm.ctpop.i8(i8)
-declare i16 @llvm.ctpop.i16(i16)
-declare i32 @llvm.ctpop.i32(i32)
-
-define i32 @test_i8(i8 %X) {
- call i8 @llvm.ctpop.i8(i8 %X)
- %Y = zext i8 %1 to i32
- ret i32 %Y
-}
-
-define i32 @test_i16(i16 %X) {
- call i16 @llvm.ctpop.i16(i16 %X)
- %Y = zext i16 %1 to i32
- ret i32 %Y
-}
-
-define i32 @test_i32(i32 %X) {
- call i32 @llvm.ctpop.i32(i32 %X)
- %Y = bitcast i32 %1 to i32
- ret i32 %Y
-}
-
diff --git a/test/CodeGen/CellSPU/div_ops.ll b/test/CodeGen/CellSPU/div_ops.ll
deleted file mode 100644
index 0c93d83ca7..0000000000
--- a/test/CodeGen/CellSPU/div_ops.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc --march=cellspu %s -o - | FileCheck %s
-
-; signed division rounds towards zero, rotma don't.
-define i32 @sdivide (i32 %val )
-{
-; CHECK: rotmai
-; CHECK: rotmi
-; CHECK: a
-; CHECK: rotmai
-; CHECK: bi $lr
- %rv = sdiv i32 %val, 4
- ret i32 %rv
-}
-
-define i32 @udivide (i32 %val )
-{
-; CHECK: rotmi
-; CHECK: bi $lr
- %rv = udiv i32 %val, 4
- ret i32 %rv
-}
-
diff --git a/test/CodeGen/CellSPU/dp_farith.ll b/test/CodeGen/CellSPU/dp_farith.ll
deleted file mode 100644
index 66bff3eb78..0000000000
--- a/test/CodeGen/CellSPU/dp_farith.ll
+++ /dev/null
@@ -1,102 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep dfa %t1.s | count 2
-; RUN: grep dfs %t1.s | count 2
-; RUN: grep dfm %t1.s | count 6
-; RUN: grep dfma %t1.s | count 2
-; RUN: grep dfms %t1.s | count 2
-; RUN: grep dfnms %t1.s | count 4
-;
-; This file includes double precision floating point arithmetic instructions
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define double @fadd(double %arg1, double %arg2) {
- %A = fadd double %arg1, %arg2
- ret double %A
-}
-
-define <2 x double> @fadd_vec(<2 x double> %arg1, <2 x double> %arg2) {
- %A = fadd <2 x double> %arg1, %arg2
- ret <2 x double> %A
-}
-
-define double @fsub(double %arg1, double %arg2) {
- %A = fsub double %arg1, %arg2
- ret double %A
-}
-
-define <2 x double> @fsub_vec(<2 x double> %arg1, <2 x double> %arg2) {
- %A = fsub <2 x double> %arg1, %arg2
- ret <2 x double> %A
-}
-
-define double @fmul(double %arg1, double %arg2) {
- %A = fmul double %arg1, %arg2
- ret double %A
-}
-
-define <2 x double> @fmul_vec(<2 x double> %arg1, <2 x double> %arg2) {
- %A = fmul <2 x double> %arg1, %arg2
- ret <2 x double> %A
-}
-
-define double @fma(double %arg1, double %arg2, double %arg3) {
- %A = fmul double %arg1, %arg2
- %B = fadd double %A, %arg3
- ret double %B
-}
-
-define <2 x double> @fma_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
- %A = fmul <2 x double> %arg1, %arg2
- %B = fadd <2 x double> %A, %arg3
- ret <2 x double> %B
-}
-
-define double @fms(double %arg1, double %arg2, double %arg3) {
- %A = fmul double %arg1, %arg2
- %B = fsub double %A, %arg3
- ret double %B
-}
-
-define <2 x double> @fms_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
- %A = fmul <2 x double> %arg1, %arg2
- %B = fsub <2 x double> %A, %arg3
- ret <2 x double> %B
-}
-
-; - (a * b - c)
-define double @d_fnms_1(double %arg1, double %arg2, double %arg3) {
- %A = fmul double %arg1, %arg2
- %B = fsub double %A, %arg3
- %C = fsub double -0.000000e+00, %B ; <double> [#uses=1]
- ret double %C
-}
-
-; Annother way of getting fnms
-; - ( a * b ) + c => c - (a * b)
-define double @d_fnms_2(double %arg1, double %arg2, double %arg3) {
- %A = fmul double %arg1, %arg2
- %B = fsub double %arg3, %A
- ret double %B
-}
-
-; FNMS: - (a * b - c) => c - (a * b)
-define <2 x double> @d_fnms_vec_1(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
- %A = fmul <2 x double> %arg1, %arg2
- %B = fsub <2 x double> %arg3, %A
- ret <2 x double> %B
-}
-
-; Another way to get fnms using a constant vector
-; - ( a * b - c)
-define <2 x double> @d_fnms_vec_2(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
- %A = fmul <2 x double> %arg1, %arg2 ; <<2 x double>> [#uses=1]
- %B = fsub <2 x double> %A, %arg3 ; <<2 x double>> [#uses=1]
- %C = fsub <2 x double> < double -0.00000e+00, double -0.00000e+00 >, %B
- ret <2 x double> %C
-}
-
-;define double @fdiv_1(double %arg1, double %arg2) {
-; %A = fdiv double %arg1, %arg2 ; <double> [#uses=1]
-; ret double %A
-;}
diff --git a/test/CodeGen/CellSPU/eqv.ll b/test/CodeGen/CellSPU/eqv.ll
deleted file mode 100644
index 79676814f2..0000000000
--- a/test/CodeGen/CellSPU/eqv.ll
+++ /dev/null
@@ -1,152 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep eqv %t1.s | count 18
-; RUN: grep xshw %t1.s | count 6
-; RUN: grep xsbh %t1.s | count 3
-; RUN: grep andi %t1.s | count 3
-
-; Test the 'eqv' instruction, whose boolean expression is:
-; (a & b) | (~a & ~b), which simplifies to
-; (a & b) | ~(a | b)
-; Alternatively, a ^ ~b, which the compiler will also match.
-
-; ModuleID = 'eqv.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define <4 x i32> @equiv_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
- %A = and <4 x i32> %arg1, %arg2
- %B = or <4 x i32> %arg1, %arg2
- %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 >
- %C = or <4 x i32> %A, %Bnot
- ret <4 x i32> %C
-}
-
-define <4 x i32> @equiv_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
- %B = or <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
- %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
- %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
- %C = or <4 x i32> %A, %Bnot ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %C
-}
-
-define <4 x i32> @equiv_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) {
- %B = or <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
- %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
- %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
- %C = or <4 x i32> %A, %Bnot ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %C
-}
-
-define <4 x i32> @equiv_v4i32_4(<4 x i32> %arg1, <4 x i32> %arg2) {
- %arg2not = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 >
- %C = xor <4 x i32> %arg1, %arg2not
- ret <4 x i32> %C
-}
-
-define i32 @equiv_i32_1(i32 %arg1, i32 %arg2) {
- %A = and i32 %arg1, %arg2 ; <i32> [#uses=1]
- %B = or i32 %arg1, %arg2 ; <i32> [#uses=1]
- %Bnot = xor i32 %B, -1 ; <i32> [#uses=1]
- %C = or i32 %A, %Bnot ; <i32> [#uses=1]
- ret i32 %C
-}
-
-define i32 @equiv_i32_2(i32 %arg1, i32 %arg2) {
- %B = or i32 %arg1, %arg2 ; <i32> [#uses=1]
- %Bnot = xor i32 %B, -1 ; <i32> [#uses=1]
- %A = and i32 %arg1, %arg2 ; <i32> [#uses=1]
- %C = or i32 %A, %Bnot ; <i32> [#uses=1]
- ret i32 %C
-}
-
-define i32 @equiv_i32_3(i32 %arg1, i32 %arg2) {
- %B = or i32 %arg1, %arg2 ; <i32> [#uses=1]
- %A = and i32 %arg1, %arg2 ; <i32> [#uses=1]
- %Bnot = xor i32 %B, -1 ; <i32> [#uses=1]
- %C = or i32 %A, %Bnot ; <i32> [#uses=1]
- ret i32 %C
-}
-
-define i32 @equiv_i32_4(i32 %arg1, i32 %arg2) {
- %arg2not = xor i32 %arg2, -1
- %C = xor i32 %arg1, %arg2not
- ret i32 %C
-}
-
-define i32 @equiv_i32_5(i32 %arg1, i32 %arg2) {
- %arg1not = xor i32 %arg1, -1
- %C = xor i32 %arg2, %arg1not
- ret i32 %C
-}
-
-define signext i16 @equiv_i16_1(i16 signext %arg1, i16 signext %arg2) {
- %A = and i16 %arg1, %arg2 ; <i16> [#uses=1]
- %B = or i16 %arg1, %arg2 ; <i16> [#uses=1]
- %Bnot = xor i16 %B, -1 ; <i16> [#uses=1]
- %C = or i16 %A, %Bnot ; <i16> [#uses=1]
- ret i16 %C
-}
-
-define signext i16 @equiv_i16_2(i16 signext %arg1, i16 signext %arg2) {
- %B = or i16 %arg1, %arg2 ; <i16> [#uses=1]
- %Bnot = xor i16 %B, -1 ; <i16> [#uses=1]
- %A = and i16 %arg1, %arg2 ; <i16> [#uses=1]
- %C = or i16 %A, %Bnot ; <i16> [#uses=1]
- ret i16 %C
-}
-
-define signext i16 @equiv_i16_3(i16 signext %arg1, i16 signext %arg2) {
- %B = or i16 %arg1, %arg2 ; <i16> [#uses=1]
- %A = and i16 %arg1, %arg2 ; <i16> [#uses=1]
- %Bnot = xor i16 %B, -1 ; <i16> [#uses=1]
- %C = or i16 %A, %Bnot ; <i16> [#uses=1]
- ret i16 %C
-}
-
-define signext i8 @equiv_i8_1(i8 signext %arg1, i8 signext %arg2) {
- %A = and i8 %arg1, %arg2 ; <i8> [#uses=1]
- %B = or i8 %arg1, %arg2 ; <i8> [#uses=1]
- %Bnot = xor i8 %B, -1 ; <i8> [#uses=1]
- %C = or i8 %A, %Bnot ; <i8> [#uses=1]
- ret i8 %C
-}
-
-define signext i8 @equiv_i8_2(i8 signext %arg1, i8 signext %arg2) {
- %B = or i8 %arg1, %arg2 ; <i8> [#uses=1]
- %Bnot = xor i8 %B, -1 ; <i8> [#uses=1]
- %A = and i8 %arg1, %arg2 ; <i8> [#uses=1]
- %C = or i8 %A, %Bnot ; <i8> [#uses=1]
- ret i8 %C
-}
-
-define signext i8 @equiv_i8_3(i8 signext %arg1, i8 signext %arg2) {
- %B = or i8 %arg1, %arg2 ; <i8> [#uses=1]
- %A = and i8 %arg1, %arg2 ; <i8> [#uses=1]
- %Bnot = xor i8 %B, -1 ; <i8> [#uses=1]
- %C = or i8 %A, %Bnot ; <i8> [#uses=1]
- ret i8 %C
-}
-
-define zeroext i8 @equiv_u8_1(i8 zeroext %arg1, i8 zeroext %arg2) {
- %A = and i8 %arg1, %arg2 ; <i8> [#uses=1]
- %B = or i8 %arg1, %arg2 ; <i8> [#uses=1]
- %Bnot = xor i8 %B, -1 ; <i8> [#uses=1]
- %C = or i8 %A, %Bnot ; <i8> [#uses=1]
- ret i8 %C
-}
-
-define zeroext i8 @equiv_u8_2(i8 zeroext %arg1, i8 zeroext %arg2) {
- %B = or i8 %arg1, %arg2 ; <i8> [#uses=1]
- %Bnot = xor i8 %B, -1 ; <i8> [#uses=1]
- %A = and i8 %arg1, %arg2 ; <i8> [#uses=1]
- %C = or i8 %A, %Bnot ; <i8> [#uses=1]
- ret i8 %C
-}
-
-define zeroext i8 @equiv_u8_3(i8 zeroext %arg1, i8 zeroext %arg2) {
- %B = or i8 %arg1, %arg2 ; <i8> [#uses=1]
- %A = and i8 %arg1, %arg2 ; <i8> [#uses=1]
- %Bnot = xor i8 %B, -1 ; <i8> [#uses=1]
- %C = or i8 %A, %Bnot ; <i8> [#uses=1]
- ret i8 %C
-}
diff --git a/test/CodeGen/CellSPU/extract_elt.ll b/test/CodeGen/CellSPU/extract_elt.ll
deleted file mode 100644
index 0ac971c58c..0000000000
--- a/test/CodeGen/CellSPU/extract_elt.ll
+++ /dev/null
@@ -1,277 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep shufb %t1.s | count 39
-; RUN: grep ilhu %t1.s | count 27
-; RUN: grep iohl %t1.s | count 27
-; RUN: grep lqa %t1.s | count 10
-; RUN: grep shlqby %t1.s | count 12
-; RUN: grep 515 %t1.s | count 1
-; RUN: grep 1029 %t1.s | count 2
-; RUN: grep 1543 %t1.s | count 2
-; RUN: grep 2057 %t1.s | count 2
-; RUN: grep 2571 %t1.s | count 2
-; RUN: grep 3085 %t1.s | count 2
-; RUN: grep 3599 %t1.s | count 2
-; RUN: grep 32768 %t1.s | count 1
-; RUN: grep 32769 %t1.s | count 1
-; RUN: grep 32770 %t1.s | count 1
-; RUN: grep 32771 %t1.s | count 1
-; RUN: grep 32772 %t1.s | count 1
-; RUN: grep 32773 %t1.s | count 1
-; RUN: grep 32774 %t1.s | count 1
-; RUN: grep 32775 %t1.s | count 1
-; RUN: grep 32776 %t1.s | count 1
-; RUN: grep 32777 %t1.s | count 1
-; RUN: grep 32778 %t1.s | count 1
-; RUN: grep 32779 %t1.s | count 1
-; RUN: grep 32780 %t1.s | count 1
-; RUN: grep 32781 %t1.s | count 1
-; RUN: grep 32782 %t1.s | count 1
-; RUN: grep 32783 %t1.s | count 1
-; RUN: grep 32896 %t1.s | count 24
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define i32 @i32_extract_0(<4 x i32> %v) {
-entry:
- %a = extractelement <4 x i32> %v, i32 0
- ret i32 %a
-}
-
-define i32 @i32_extract_1(<4 x i32> %v) {
-entry:
- %a = extractelement <4 x i32> %v, i32 1
- ret i32 %a
-}
-
-define i32 @i32_extract_2(<4 x i32> %v) {
-entry:
- %a = extractelement <4 x i32> %v, i32 2
- ret i32 %a
-}
-
-define i32 @i32_extract_3(<4 x i32> %v) {
-entry:
- %a = extractelement <4 x i32> %v, i32 3
- ret i32 %a
-}
-
-define i16 @i16_extract_0(<8 x i16> %v) {
-entry:
- %a = extractelement <8 x i16> %v, i32 0
- ret i16 %a
-}
-
-define i16 @i16_extract_1(<8 x i16> %v) {
-entry:
- %a = extractelement <8 x i16> %v, i32 1
- ret i16 %a
-}
-
-define i16 @i16_extract_2(<8 x i16> %v) {
-entry:
- %a = extractelement <8 x i16> %v, i32 2
- ret i16 %a
-}
-
-define i16 @i16_extract_3(<8 x i16> %v) {
-entry:
- %a = extractelement <8 x i16> %v, i32 3
- ret i16 %a
-}
-
-define i16 @i16_extract_4(<8 x i16> %v) {
-entry:
- %a = extractelement <8 x i16> %v, i32 4
- ret i16 %a
-}
-
-define i16 @i16_extract_5(<8 x i16> %v) {
-entry:
- %a = extractelement <8 x i16> %v, i32 5
- ret i16 %a
-}
-
-define i16 @i16_extract_6(<8 x i16> %v) {
-entry:
- %a = extractelement <8 x i16> %v, i32 6
- ret i16 %a
-}
-
-define i16 @i16_extract_7(<8 x i16> %v) {
-entry:
- %a = extractelement <8 x i16> %v, i32 7
- ret i16 %a
-}
-
-define i8 @i8_extract_0(<16 x i8> %v) {
-entry:
- %a = extractelement <16 x i8> %v, i32 0
- ret i8 %a
-}
-
-define i8 @i8_extract_1(<16 x i8> %v) {
-entry:
- %a = extractelement <16 x i8> %v, i32 1
- ret i8 %a
-}
-
-define i8 @i8_extract_2(<16 x i8> %v) {
-entry:
- %a = extractelement <16 x i8> %v, i32 2
- ret i8 %a
-}
-
-define i8 @i8_extract_3(<16 x i8> %v) {
-entry:
- %a = extractelement <16 x i8> %v, i32 3
- ret i8 %a
-}
-
-define i8 @i8_extract_4(<16 x i8> %v) {
-entry:
- %a = extractelement <16 x i8> %v, i32 4
- ret i8 %a
-}
-
-define i8 @i8_extract_5(<16 x i8> %v) {
-entry:
- %a = extractelement <16 x i8> %v, i32 5
- ret i8 %a
-}
-
-define i8 @i8_extract_6(<16 x i8> %v) {
-entry:
- %a = extractelement <16 x i8> %v, i32 6
- ret i8 %a
-}
-
-define i8 @i8_extract_7(<16 x i8> %v) {
-entry:
- %a = extractelement <16 x i8> %v, i32 7
- ret i8 %a
-}
-
-define i8 @i8_extract_8(<16 x i8> %v) {
-entry:
- %a = extractelement <16 x i8> %v, i32 8
- ret i8 %a
-}
-
-define i8 @i8_extract_9(<16 x i8> %v) {
-entry:
- %a = extractelement <16 x i8> %v, i32 9
- ret i8 %a
-}
-
-define i8 @i8_extract_10(<16 x i8> %v) {
-entry:
- %a = extractelement <16 x i8> %v, i32 10
- ret i8 %a
-}
-
-define i8 @i8_extract_11(<16 x i8> %v) {
-entry:
- %a = extractelement <16 x i8> %v, i32 11
- ret i8 %a
-}
-
-define i8 @i8_extract_12(<16 x i8> %v) {
-entry:
- %a = extractelement <16 x i8> %v, i32 12
- ret i8 %a
-}
-
-define i8 @i8_extract_13(<16 x i8> %v) {
-entry:
- %a = extractelement <16 x i8> %v, i32 13
- ret i8 %a
-}
-
-define i8 @i8_extract_14(<16 x i8> %v) {
-entry:
- %a = extractelement <16 x i8> %v, i32 14
- ret i8 %a
-}
-
-define i8 @i8_extract_15(<16 x i8> %v) {
-entry:
- %a = extractelement <16 x i8> %v, i32 15
- ret i8 %a
-}
-
-;;--------------------------------------------------------------------------
-;; extract element, variable index:
-;;--------------------------------------------------------------------------
-
-define i8 @extract_varadic_i8(i32 %i) nounwind readnone {
-entry:
- %0 = extractelement <16 x i8> < i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, i32 %i
- ret i8 %0
-}
-
-define i8 @extract_varadic_i8_1(<16 x i8> %v, i32 %i) nounwind readnone {
-entry:
- %0 = extractelement <16 x i8> %v, i32 %i
- ret i8 %0
-}
-
-define i16 @extract_varadic_i16(i32 %i) nounwind readnone {
-entry:
- %0 = extractelement <8 x i16> < i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i32 %i
- ret i16 %0
-}
-
-define i16 @extract_varadic_i16_1(<8 x i16> %v, i32 %i) nounwind readnone {
-entry:
- %0 = extractelement <8 x i16> %v, i32 %i
- ret i16 %0
-}
-
-define i32 @extract_varadic_i32(i32 %i) nounwind readnone {
-entry:
- %0 = extractelement <4 x i32> < i32 0, i32 1, i32 2, i32 3>, i32 %i
- ret i32 %0
-}
-
-define i32 @extract_varadic_i32_1(<4 x i32> %v, i32 %i) nounwind readnone {
-entry:
- %0 = extractelement <4 x i32> %v, i32 %i
- ret i32 %0
-}
-
-define float @extract_varadic_f32(i32 %i) nounwind readnone {
-entry:
- %0 = extractelement <4 x float> < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >, i32 %i
- ret float %0
-}
-
-define float @extract_varadic_f32_1(<4 x float> %v, i32 %i) nounwind readnone {
-entry:
- %0 = extractelement <4 x float> %v, i32 %i
- ret float %0
-}
-
-define i64 @extract_varadic_i64(i32 %i) nounwind readnone {
-entry:
- %0 = extractelement <2 x i64> < i64 0, i64 1>, i32 %i
- ret i64 %0
-}
-
-define i64 @extract_varadic_i64_1(<2 x i64> %v, i32 %i) nounwind readnone {
-entry:
- %0 = extractelement <2 x i64> %v, i32 %i
- ret i64 %0
-}
-
-define double @extract_varadic_f64(i32 %i) nounwind readnone {
-entry:
- %0 = extractelement <2 x double> < double 1.000000e+00, double 2.000000e+00>, i32 %i
- ret double %0
-}
-
-define double @extract_varadic_f64_1(<2 x double> %v, i32 %i) nounwind readnone {
-entry:
- %0 = extractelement <2 x double> %v, i32 %i
- ret double %0
-}
diff --git a/test/CodeGen/CellSPU/fcmp32.ll b/test/CodeGen/CellSPU/fcmp32.ll
deleted file mode 100644
index f6b028dbb8..0000000000
--- a/test/CodeGen/CellSPU/fcmp32.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; RUN: llc --mtriple=cellspu-unknown-elf %s -o - | FileCheck %s
-
-; Exercise the floating point comparison operators for f32:
-
-declare double @fabs(double)
-declare float @fabsf(float)
-
-define i1 @fcmp_eq(float %arg1, float %arg2) {
-; CHECK: fceq
-; CHECK: bi $lr
- %A = fcmp oeq float %arg1, %arg2
- ret i1 %A
-}
-
-define i1 @fcmp_mag_eq(float %arg1, float %arg2) {
-; CHECK: fcmeq
-; CHECK: bi $lr
- %1 = call float @fabsf(float %arg1) readnone
- %2 = call float @fabsf(float %arg2) readnone
- %3 = fcmp oeq float %1, %2
- ret i1 %3
-}
-
-define i1 @test_ogt(float %a, float %b) {
-; CHECK: fcgt
-; CHECK: bi $lr
- %cmp = fcmp ogt float %a, %b
- ret i1 %cmp
-}
-
-define i1 @test_ugt(float %a, float %b) {
-; CHECK: fcgt
-; CHECK: bi $lr
- %cmp = fcmp ugt float %a, %b
- ret i1 %cmp
-}
diff --git a/test/CodeGen/CellSPU/fcmp64.ll b/test/CodeGen/CellSPU/fcmp64.ll
deleted file mode 100644
index 2b61fa6d2d..0000000000
--- a/test/CodeGen/CellSPU/fcmp64.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-
-define i1 @fcmp_eq_setcc_f64(double %arg1, double %arg2) nounwind {
-entry:
- %A = fcmp oeq double %arg1, %arg2
- ret i1 %A
-}
diff --git a/test/CodeGen/CellSPU/fdiv.ll b/test/CodeGen/CellSPU/fdiv.ll
deleted file mode 100644
index 9921626b79..0000000000
--- a/test/CodeGen/CellSPU/fdiv.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep frest %t1.s | count 2
-; RUN: grep -w fi %t1.s | count 2
-; RUN: grep -w fm %t1.s | count 2
-; RUN: grep fma %t1.s | count 2
-; RUN: grep fnms %t1.s | count 4
-; RUN: grep cgti %t1.s | count 2
-; RUN: grep selb %t1.s | count 2
-;
-; This file includes standard floating point arithmetic instructions
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define float @fdiv32(float %arg1, float %arg2) {
- %A = fdiv float %arg1, %arg2
- ret float %A
-}
-
-define <4 x float> @fdiv_v4f32(<4 x float> %arg1, <4 x float> %arg2) {
- %A = fdiv <4 x float> %arg1, %arg2
- ret <4 x float> %A
-}
diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll
deleted file mode 100644
index 6e01906dae..0000000000
--- a/test/CodeGen/CellSPU/fneg-fabs.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep 32768 %t1.s | count 2
-; RUN: grep xor %t1.s | count 4
-; RUN: grep and %t1.s | count 2
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define double @fneg_dp(double %X) {
- %Y = fsub double -0.000000e+00, %X
- ret double %Y
-}
-
-define <2 x double> @fneg_dp_vec(<2 x double> %X) {
- %Y = fsub <2 x double> < double -0.0000e+00, double -0.0000e+00 >, %X
- ret <2 x double> %Y
-}
-
-define float @fneg_sp(float %X) {
- %Y = fsub float -0.000000e+00, %X
- ret float %Y
-}
-
-define <4 x float> @fneg_sp_vec(<4 x float> %X) {
- %Y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00,
- float -0.000000e+00, float -0.000000e+00>, %X
- ret <4 x float> %Y
-}
-
-declare double @fabs(double)
-
-declare float @fabsf(float)
-
-define double @fabs_dp(double %X) {
- %Y = call double @fabs( double %X ) readnone
- ret double %Y
-}
-
-define float @fabs_sp(float %X) {
- %Y = call float @fabsf( float %X ) readnone
- ret float %Y
-}
diff --git a/test/CodeGen/CellSPU/i64ops.ll b/test/CodeGen/CellSPU/i64ops.ll
deleted file mode 100644
index 3553cbbf7b..0000000000
--- a/test/CodeGen/CellSPU/i64ops.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep xswd %t1.s | count 3
-; RUN: grep xsbh %t1.s | count 1
-; RUN: grep xshw %t1.s | count 2
-; RUN: grep shufb %t1.s | count 7
-; RUN: grep cg %t1.s | count 4
-; RUN: grep addx %t1.s | count 4
-; RUN: grep fsmbi %t1.s | count 3
-; RUN: grep il %t1.s | count 2
-; RUN: grep mpy %t1.s | count 10
-; RUN: grep mpyh %t1.s | count 6
-; RUN: grep mpyhhu %t1.s | count 2
-; RUN: grep mpyu %t1.s | count 4
-
-; ModuleID = 'stores.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define i64 @sext_i64_i8(i8 %a) nounwind {
- %1 = sext i8 %a to i64
- ret i64 %1
-}
-
-define i64 @sext_i64_i16(i16 %a) nounwind {
- %1 = sext i16 %a to i64
- ret i64 %1
-}
-
-define i64 @sext_i64_i32(i32 %a) nounwind {
- %1 = sext i32 %a to i64
- ret i64 %1
-}
-
-define i64 @zext_i64_i8(i8 %a) nounwind {
- %1 = zext i8 %a to i64
- ret i64 %1
-}
-
-define i64 @zext_i64_i16(i16 %a) nounwind {
- %1 = zext i16 %a to i64
- ret i64 %1
-}
-
-define i64 @zext_i64_i32(i32 %a) nounwind {
- %1 = zext i32 %a to i64
- ret i64 %1
-}
-
-define i64 @add_i64(i64 %a, i64 %b) nounwind {
- %1 = add i64 %a, %b
- ret i64 %1
-}
-
-define i64 @mul_i64(i64 %a, i64 %b) nounwind {
- %1 = mul i64 %a, %b
- ret i64 %1
-}
diff --git a/test/CodeGen/CellSPU/i8ops.ll b/test/CodeGen/CellSPU/i8ops.ll
deleted file mode 100644
index 57a2aa8947..0000000000
--- a/test/CodeGen/CellSPU/i8ops.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-
-; ModuleID = 'i8ops.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define i8 @add_i8(i8 %a, i8 %b) nounwind {
- %1 = add i8 %a, %b
- ret i8 %1
-}
-
-define i8 @add_i8_imm(i8 %a, i8 %b) nounwind {
- %1 = add i8 %a, 15
- ret i8 %1
-}
-
-define i8 @sub_i8(i8 %a, i8 %b) nounwind {
- %1 = sub i8 %a, %b
- ret i8 %1
-}
-
-define i8 @sub_i8_imm(i8 %a, i8 %b) nounwind {
- %1 = sub i8 %a, 15
- ret i8 %1
-}
diff --git a/test/CodeGen/CellSPU/icmp16.ll b/test/CodeGen/CellSPU/icmp16.ll
deleted file mode 100644
index 853ae1db16..0000000000
--- a/test/CodeGen/CellSPU/icmp16.ll
+++ /dev/null
@@ -1,574 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
-; $3 = %arg1, $4 = %val1, $5 = %val2
-;
-; For "positive" comparisons:
-; selb $3, $6, $5, <i1>
-; selb $3, $5, $4, <i1>
-;
-; For "negative" comparisons, i.e., those where the result of the comparison
-; must be inverted (setne, for example):
-; selb $3, $5, $6, <i1>
-; selb $3, $4, $5, <i1>
-
-; i16 integer comparisons:
-define i16 @icmp_eq_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_eq_select_i16:
-; CHECK: ceqh
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp eq i16 %arg1, %arg2
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i1 @icmp_eq_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_eq_setcc_i16:
-; CHECK: ilhu
-; CHECK: ceqh
-; CHECK: iohl
-; CHECK: shufb
-
-entry:
- %A = icmp eq i16 %arg1, %arg2
- ret i1 %A
-}
-
-define i16 @icmp_eq_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_eq_immed01_i16:
-; CHECK: ceqhi
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp eq i16 %arg1, 511
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_eq_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_eq_immed02_i16:
-; CHECK: ceqhi
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp eq i16 %arg1, -512
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_eq_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_eq_immed03_i16:
-; CHECK: ceqhi
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp eq i16 %arg1, -1
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_eq_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_eq_immed04_i16:
-; CHECK: ilh
-; CHECK: ceqh
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp eq i16 %arg1, 32768
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_ne_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ne_select_i16:
-; CHECK: ceqh
-; CHECK: selb $3, $5, $6, $3
-
-entry:
- %A = icmp ne i16 %arg1, %arg2
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i1 @icmp_ne_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ne_setcc_i16:
-; CHECK: ceqh
-; CHECK: ilhu
-; CHECK: xorhi
-; CHECK: iohl
-; CHECK: shufb
-
-entry:
- %A = icmp ne i16 %arg1, %arg2
- ret i1 %A
-}
-
-define i16 @icmp_ne_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ne_immed01_i16:
-; CHECK: ceqhi
-; CHECK: selb $3, $4, $5, $3
-
-entry:
- %A = icmp ne i16 %arg1, 511
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_ne_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ne_immed02_i16:
-; CHECK: ceqhi
-; CHECK: selb $3, $4, $5, $3
-
-entry:
- %A = icmp ne i16 %arg1, -512
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_ne_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ne_immed03_i16:
-; CHECK: ceqhi
-; CHECK: selb $3, $4, $5, $3
-
-entry:
- %A = icmp ne i16 %arg1, -1
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_ne_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ne_immed04_i16:
-; CHECK: ilh
-; CHECK: ceqh
-; CHECK: selb $3, $4, $5, $3
-
-entry:
- %A = icmp ne i16 %arg1, 32768
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_ugt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ugt_select_i16:
-; CHECK: clgth
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp ugt i16 %arg1, %arg2
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i1 @icmp_ugt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ugt_setcc_i16:
-; CHECK: ilhu
-; CHECK: clgth
-; CHECK: iohl
-; CHECK: shufb
-
-entry:
- %A = icmp ugt i16 %arg1, %arg2
- ret i1 %A
-}
-
-define i16 @icmp_ugt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ugt_immed01_i16:
-; CHECK: clgthi
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp ugt i16 %arg1, 500
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_ugt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ugt_immed02_i16:
-; CHECK: ceqhi
-; CHECK: selb $3, $4, $5, $3
-
-entry:
- %A = icmp ugt i16 %arg1, 0
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_ugt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ugt_immed03_i16:
-; CHECK: clgthi
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp ugt i16 %arg1, 65024
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_ugt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ugt_immed04_i16:
-; CHECK: ilh
-; CHECK: clgth
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp ugt i16 %arg1, 32768
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_uge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_uge_select_i16:
-; CHECK: ceqh
-; CHECK: clgth
-; CHECK: or
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp uge i16 %arg1, %arg2
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i1 @icmp_uge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_uge_setcc_i16:
-; CHECK: ceqh
-; CHECK: clgth
-; CHECK: ilhu
-; CHECK: or
-; CHECK: iohl
-; CHECK: shufb
-
-entry:
- %A = icmp uge i16 %arg1, %arg2
- ret i1 %A
-}
-
-;; Note: icmp uge i16 %arg1, <immed> can always be transformed into
-;; icmp ugt i16 %arg1, <immed>-1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i16 @icmp_ult_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ult_select_i16:
-; CHECK: ceqh
-; CHECK: clgth
-; CHECK: nor
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp ult i16 %arg1, %arg2
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i1 @icmp_ult_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ult_setcc_i16:
-; CHECK: ceqh
-; CHECK: clgth
-; CHECK: ilhu
-; CHECK: nor
-; CHECK: iohl
-; CHECK: shufb
-
-entry:
- %A = icmp ult i16 %arg1, %arg2
- ret i1 %A
-}
-
-define i16 @icmp_ult_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ult_immed01_i16:
-; CHECK: ceqhi
-; CHECK: clgthi
-; CHECK: nor
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp ult i16 %arg1, 511
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_ult_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ult_immed02_i16:
-; CHECK: ceqhi
-; CHECK: clgthi
-; CHECK: nor
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp ult i16 %arg1, 65534
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_ult_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ult_immed03_i16:
-; CHECK: ceqhi
-; CHECK: clgthi
-; CHECK: nor
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp ult i16 %arg1, 65024
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_ult_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ult_immed04_i16:
-; CHECK: ilh
-; CHECK: ceqh
-; CHECK: clgth
-; CHECK: nor
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp ult i16 %arg1, 32769
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_ule_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ule_select_i16:
-; CHECK: clgth
-; CHECK: selb $3, $5, $6, $3
-
-entry:
- %A = icmp ule i16 %arg1, %arg2
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i1 @icmp_ule_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_ule_setcc_i16:
-; CHECK: clgth
-; CHECK: ilhu
-; CHECK: xorhi
-; CHECK: iohl
-; CHECK: shufb
-
-entry:
- %A = icmp ule i16 %arg1, %arg2
- ret i1 %A
-}
-
-;; Note: icmp ule i16 %arg1, <immed> can always be transformed into
-;; icmp ult i16 %arg1, <immed>+1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i16 @icmp_sgt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_sgt_select_i16:
-; CHECK: cgth
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp sgt i16 %arg1, %arg2
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i1 @icmp_sgt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_sgt_setcc_i16:
-; CHECK: ilhu
-; CHECK: cgth
-; CHECK: iohl
-; CHECK: shufb
-
-entry:
- %A = icmp sgt i16 %arg1, %arg2
- ret i1 %A
-}
-
-define i16 @icmp_sgt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_sgt_immed01_i16:
-; CHECK: cgthi
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp sgt i16 %arg1, 511
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_sgt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_sgt_immed02_i16:
-; CHECK: cgthi
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp sgt i16 %arg1, -1
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_sgt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_sgt_immed03_i16:
-; CHECK: cgthi
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp sgt i16 %arg1, -512
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_sgt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_sgt_immed04_i16:
-; CHECK: ilh
-; CHECK: ceqh
-; CHECK: selb $3, $4, $5, $3
-
-entry:
- %A = icmp sgt i16 %arg1, 32768
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_sge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_sge_select_i16:
-; CHECK: ceqh
-; CHECK: cgth
-; CHECK: or
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp sge i16 %arg1, %arg2
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i1 @icmp_sge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_sge_setcc_i16:
-; CHECK: ceqh
-; CHECK: cgth
-; CHECK: ilhu
-; CHECK: or
-; CHECK: iohl
-; CHECK: shufb
-
-entry:
- %A = icmp sge i16 %arg1, %arg2
- ret i1 %A
-}
-
-;; Note: icmp sge i16 %arg1, <immed> can always be transformed into
-;; icmp sgt i16 %arg1, <immed>-1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i16 @icmp_slt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_slt_select_i16:
-; CHECK: ceqh
-; CHECK: cgth
-; CHECK: nor
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp slt i16 %arg1, %arg2
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i1 @icmp_slt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_slt_setcc_i16:
-; CHECK: ceqh
-; CHECK: cgth
-; CHECK: ilhu
-; CHECK: nor
-; CHECK: iohl
-; CHECK: shufb
-
-entry:
- %A = icmp slt i16 %arg1, %arg2
- ret i1 %A
-}
-
-define i16 @icmp_slt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_slt_immed01_i16:
-; CHECK: ceqhi
-; CHECK: cgthi
-; CHECK: nor
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp slt i16 %arg1, 511
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_slt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_slt_immed02_i16:
-; CHECK: ceqhi
-; CHECK: cgthi
-; CHECK: nor
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp slt i16 %arg1, -512
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_slt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_slt_immed03_i16:
-; CHECK: ceqhi
-; CHECK: cgthi
-; CHECK: nor
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp slt i16 %arg1, -1
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_slt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_slt_immed04_i16:
-; CHECK: lr
-; CHECK-NEXT: bi
-
-entry:
- %A = icmp slt i16 %arg1, 32768
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i16 @icmp_sle_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_sle_select_i16:
-; CHECK: cgth
-; CHECK: selb $3, $5, $6, $3
-
-entry:
- %A = icmp sle i16 %arg1, %arg2
- %B = select i1 %A, i16 %val1, i16 %val2
- ret i16 %B
-}
-
-define i1 @icmp_sle_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
-; CHECK: icmp_sle_setcc_i16:
-; CHECK: cgth
-; CHECK: ilhu
-; CHECK: xorhi
-; CHECK: iohl
-; CHECK: bi
-
-entry:
- %A = icmp sle i16 %arg1, %arg2
- ret i1 %A
-}
-
-;; Note: icmp sle i16 %arg1, <immed> can always be transformed into
-;; icmp slt i16 %arg1, <immed>+1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
diff --git a/test/CodeGen/CellSPU/icmp32.ll b/test/CodeGen/CellSPU/icmp32.ll
deleted file mode 100644
index 1794f4cd7b..0000000000
--- a/test/CodeGen/CellSPU/icmp32.ll
+++ /dev/null
@@ -1,575 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
-; $3 = %arg1, $4 = %val1, $5 = %val2
-;
-; For "positive" comparisons:
-; selb $3, $6, $5, <i1>
-; selb $3, $5, $4, <i1>
-;
-; For "negative" comparisons, i.e., those where the result of the comparison
-; must be inverted (setne, for example):
-; selb $3, $5, $6, <i1>
-; selb $3, $4, $5, <i1>
-
-; i32 integer comparisons:
-define i32 @icmp_eq_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_eq_select_i32:
-; CHECK: ceq
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp eq i32 %arg1, %arg2
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i1 @icmp_eq_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_eq_setcc_i32:
-; CHECK: ilhu
-; CHECK: ceq
-; CHECK: iohl
-; CHECK: shufb
-
-entry:
- %A = icmp eq i32 %arg1, %arg2
- ret i1 %A
-}
-
-define i32 @icmp_eq_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_eq_immed01_i32:
-; CHECK: ceqi
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp eq i32 %arg1, 511
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_eq_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_eq_immed02_i32:
-; CHECK: ceqi
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp eq i32 %arg1, -512
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_eq_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_eq_immed03_i32:
-; CHECK: ceqi
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp eq i32 %arg1, -1
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_eq_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_eq_immed04_i32:
-; CHECK: ila
-; CHECK: ceq
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp eq i32 %arg1, 32768
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_ne_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ne_select_i32:
-; CHECK: ceq
-; CHECK: selb $3, $5, $6, $3
-
-entry:
- %A = icmp ne i32 %arg1, %arg2
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i1 @icmp_ne_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ne_setcc_i32:
-; CHECK: ceq
-; CHECK: ilhu
-; CHECK: xori
-; CHECK: iohl
-; CHECK: shufb
-
-entry:
- %A = icmp ne i32 %arg1, %arg2
- ret i1 %A
-}
-
-define i32 @icmp_ne_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ne_immed01_i32:
-; CHECK: ceqi
-; CHECK: selb $3, $4, $5, $3
-
-entry:
- %A = icmp ne i32 %arg1, 511
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_ne_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ne_immed02_i32:
-; CHECK: ceqi
-; CHECK: selb $3, $4, $5, $3
-
-entry:
- %A = icmp ne i32 %arg1, -512
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_ne_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ne_immed03_i32:
-; CHECK: ceqi
-; CHECK: selb $3, $4, $5, $3
-
-entry:
- %A = icmp ne i32 %arg1, -1
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_ne_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ne_immed04_i32:
-; CHECK: ila
-; CHECK: ceq
-; CHECK: selb $3, $4, $5, $3
-
-entry:
- %A = icmp ne i32 %arg1, 32768
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_ugt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ugt_select_i32:
-; CHECK: clgt
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp ugt i32 %arg1, %arg2
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i1 @icmp_ugt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ugt_setcc_i32:
-; CHECK: ilhu
-; CHECK: clgt
-; CHECK: iohl
-; CHECK: shufb
-
-entry:
- %A = icmp ugt i32 %arg1, %arg2
- ret i1 %A
-}
-
-define i32 @icmp_ugt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ugt_immed01_i32:
-; CHECK: clgti
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp ugt i32 %arg1, 511
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_ugt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ugt_immed02_i32:
-; CHECK: clgti
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp ugt i32 %arg1, 4294966784
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_ugt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ugt_immed03_i32:
-; CHECK: clgti
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp ugt i32 %arg1, 4294967293
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_ugt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ugt_immed04_i32:
-; CHECK: ila
-; CHECK: clgt
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp ugt i32 %arg1, 32768
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_uge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_uge_select_i32:
-; CHECK: ceq
-; CHECK: clgt
-; CHECK: or
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp uge i32 %arg1, %arg2
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i1 @icmp_uge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_uge_setcc_i32:
-; CHECK: ceq
-; CHECK: clgt
-; CHECK: ilhu
-; CHECK: or
-; CHECK: iohl
-; CHECK: shufb
-
-entry:
- %A = icmp uge i32 %arg1, %arg2
- ret i1 %A
-}
-
-;; Note: icmp uge i32 %arg1, <immed> can always be transformed into
-;; icmp ugt i32 %arg1, <immed>-1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i32 @icmp_ult_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ult_select_i32:
-; CHECK: ceq
-; CHECK: clgt
-; CHECK: nor
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp ult i32 %arg1, %arg2
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i1 @icmp_ult_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ult_setcc_i32:
-; CHECK: ceq
-; CHECK: clgt
-; CHECK: ilhu
-; CHECK: nor
-; CHECK: iohl
-; CHECK: shufb
-
-entry:
- %A = icmp ult i32 %arg1, %arg2
- ret i1 %A
-}
-
-define i32 @icmp_ult_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ult_immed01_i32:
-; CHECK: ceqi
-; CHECK: clgti
-; CHECK: nor
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp ult i32 %arg1, 511
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_ult_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ult_immed02_i32:
-; CHECK: ceqi
-; CHECK: clgti
-; CHECK: nor
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp ult i32 %arg1, 4294966784
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_ult_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ult_immed03_i32:
-; CHECK: ceqi
-; CHECK: clgti
-; CHECK: nor
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp ult i32 %arg1, 4294967293
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_ult_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ult_immed04_i32:
-; CHECK: rotmi
-; CHECK: ceqi
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp ult i32 %arg1, 32768
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_ule_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ule_select_i32:
-; CHECK: clgt
-; CHECK: selb $3, $5, $6, $3
-
-entry:
- %A = icmp ule i32 %arg1, %arg2
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i1 @icmp_ule_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_ule_setcc_i32:
-; CHECK: clgt
-; CHECK: ilhu
-; CHECK: xori
-; CHECK: iohl
-; CHECK: shufb
-
-entry:
- %A = icmp ule i32 %arg1, %arg2
- ret i1 %A
-}
-
-;; Note: icmp ule i32 %arg1, <immed> can always be transformed into
-;; icmp ult i32 %arg1, <immed>+1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i32 @icmp_sgt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_sgt_select_i32:
-; CHECK: cgt
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp sgt i32 %arg1, %arg2
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i1 @icmp_sgt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_sgt_setcc_i32:
-; CHECK: ilhu
-; CHECK: cgt
-; CHECK: iohl
-; CHECK: shufb
-
-entry:
- %A = icmp sgt i32 %arg1, %arg2
- ret i1 %A
-}
-
-define i32 @icmp_sgt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_sgt_immed01_i32:
-; CHECK: cgti
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp sgt i32 %arg1, 511
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_sgt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_sgt_immed02_i32:
-; CHECK: cgti
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp sgt i32 %arg1, 4294966784
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_sgt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_sgt_immed03_i32:
-; CHECK: cgti
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp sgt i32 %arg1, 4294967293
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_sgt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_sgt_immed04_i32:
-; CHECK: ila
-; CHECK: cgt
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp sgt i32 %arg1, 32768
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_sge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_sge_select_i32:
-; CHECK: ceq
-; CHECK: cgt
-; CHECK: or
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp sge i32 %arg1, %arg2
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i1 @icmp_sge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_sge_setcc_i32:
-; CHECK: ceq
-; CHECK: cgt
-; CHECK: ilhu
-; CHECK: or
-; CHECK: iohl
-; CHECK: shufb
-
-entry:
- %A = icmp sge i32 %arg1, %arg2
- ret i1 %A
-}
-
-;; Note: icmp sge i32 %arg1, <immed> can always be transformed into
-;; icmp sgt i32 %arg1, <immed>-1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i32 @icmp_slt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_slt_select_i32:
-; CHECK: ceq
-; CHECK: cgt
-; CHECK: nor
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp slt i32 %arg1, %arg2
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i1 @icmp_slt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_slt_setcc_i32:
-; CHECK: ceq
-; CHECK: cgt
-; CHECK: ilhu
-; CHECK: nor
-; CHECK: iohl
-; CHECK: shufb
-
-entry:
- %A = icmp slt i32 %arg1, %arg2
- ret i1 %A
-}
-
-define i32 @icmp_slt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_slt_immed01_i32:
-; CHECK: ceqi
-; CHECK: cgti
-; CHECK: nor
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp slt i32 %arg1, 511
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_slt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_slt_immed02_i32:
-; CHECK: ceqi
-; CHECK: cgti
-; CHECK: nor
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp slt i32 %arg1, -512
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_slt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_slt_immed03_i32:
-; CHECK: ceqi
-; CHECK: cgti
-; CHECK: nor
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp slt i32 %arg1, -1
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_slt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_slt_immed04_i32:
-; CHECK: ila
-; CHECK: ceq
-; CHECK: cgt
-; CHECK: nor
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp slt i32 %arg1, 32768
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i32 @icmp_sle_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_sle_select_i32:
-; CHECK: cgt
-; CHECK: selb $3, $5, $6, $3
-
-entry:
- %A = icmp sle i32 %arg1, %arg2
- %B = select i1 %A, i32 %val1, i32 %val2
- ret i32 %B
-}
-
-define i1 @icmp_sle_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
-; CHECK: icmp_sle_setcc_i32:
-; CHECK: cgt
-; CHECK: ilhu
-; CHECK: xori
-; CHECK: iohl
-; CHECK: shufb
-
-entry:
- %A = icmp sle i32 %arg1, %arg2
- ret i1 %A
-}
-
-;; Note: icmp sle i32 %arg1, <immed> can always be transformed into
-;; icmp slt i32 %arg1, <immed>+1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
diff --git a/test/CodeGen/CellSPU/icmp64.ll b/test/CodeGen/CellSPU/icmp64.ll
deleted file mode 100644
index 9dd2cdc0de..0000000000
--- a/test/CodeGen/CellSPU/icmp64.ll
+++ /dev/null
@@ -1,146 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep ceq %t1.s | count 20
-; RUN: grep cgti %t1.s | count 12
-; RUN: grep cgt %t1.s | count 16
-; RUN: grep clgt %t1.s | count 12
-; RUN: grep gb %t1.s | count 12
-; RUN: grep fsm %t1.s | count 10
-; RUN: grep xori %t1.s | count 5
-; RUN: grep selb %t1.s | count 18
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
-; $3 = %arg1, $4 = %val1, $5 = %val2
-;
-; i64 integer comparisons:
-define i64 @icmp_eq_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp eq i64 %arg1, %arg2
- %B = select i1 %A, i64 %val1, i64 %val2
- ret i64 %B
-}
-
-define i1 @icmp_eq_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp eq i64 %arg1, %arg2
- ret i1 %A
-}
-
-define i64 @icmp_ne_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp ne i64 %arg1, %arg2
- %B = select i1 %A, i64 %val1, i64 %val2
- ret i64 %B
-}
-
-define i1 @icmp_ne_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp ne i64 %arg1, %arg2
- ret i1 %A
-}
-
-define i64 @icmp_ugt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp ugt i64 %arg1, %arg2
- %B = select i1 %A, i64 %val1, i64 %val2
- ret i64 %B
-}
-
-define i1 @icmp_ugt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp ugt i64 %arg1, %arg2
- ret i1 %A
-}
-
-define i64 @icmp_uge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp uge i64 %arg1, %arg2
- %B = select i1 %A, i64 %val1, i64 %val2
- ret i64 %B
-}
-
-define i1 @icmp_uge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp uge i64 %arg1, %arg2
- ret i1 %A
-}
-
-define i64 @icmp_ult_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp ult i64 %arg1, %arg2
- %B = select i1 %A, i64 %val1, i64 %val2
- ret i64 %B
-}
-
-define i1 @icmp_ult_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp ult i64 %arg1, %arg2
- ret i1 %A
-}
-
-define i64 @icmp_ule_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp ule i64 %arg1, %arg2
- %B = select i1 %A, i64 %val1, i64 %val2
- ret i64 %B
-}
-
-define i1 @icmp_ule_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp ule i64 %arg1, %arg2
- ret i1 %A
-}
-
-define i64 @icmp_sgt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp sgt i64 %arg1, %arg2
- %B = select i1 %A, i64 %val1, i64 %val2
- ret i64 %B
-}
-
-define i1 @icmp_sgt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp sgt i64 %arg1, %arg2
- ret i1 %A
-}
-
-define i64 @icmp_sge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp sge i64 %arg1, %arg2
- %B = select i1 %A, i64 %val1, i64 %val2
- ret i64 %B
-}
-
-define i1 @icmp_sge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp sge i64 %arg1, %arg2
- ret i1 %A
-}
-
-define i64 @icmp_slt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp slt i64 %arg1, %arg2
- %B = select i1 %A, i64 %val1, i64 %val2
- ret i64 %B
-}
-
-define i1 @icmp_slt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp slt i64 %arg1, %arg2
- ret i1 %A
-}
-
-define i64 @icmp_sle_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp sle i64 %arg1, %arg2
- %B = select i1 %A, i64 %val1, i64 %val2
- ret i64 %B
-}
-
-define i1 @icmp_sle_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
-entry:
- %A = icmp sle i64 %arg1, %arg2
- ret i1 %A
-}
diff --git a/test/CodeGen/CellSPU/icmp8.ll b/test/CodeGen/CellSPU/icmp8.ll
deleted file mode 100644
index 1db641e5a8..0000000000
--- a/test/CodeGen/CellSPU/icmp8.ll
+++ /dev/null
@@ -1,446 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
-; $3 = %arg1, $4 = %val1, $5 = %val2
-;
-; For "positive" comparisons:
-; selb $3, $6, $5, <i1>
-; selb $3, $5, $4, <i1>
-;
-; For "negative" comparisons, i.e., those where the result of the comparison
-; must be inverted (setne, for example):
-; selb $3, $5, $6, <i1>
-; selb $3, $4, $5, <i1>
-
-; i8 integer comparisons:
-define i8 @icmp_eq_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_eq_select_i8:
-; CHECK: ceqb
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp eq i8 %arg1, %arg2
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i1 @icmp_eq_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_eq_setcc_i8:
-; CHECK: ceqb
-; CHECK-NEXT: bi
-
-entry:
- %A = icmp eq i8 %arg1, %arg2
- ret i1 %A
-}
-
-define i8 @icmp_eq_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_eq_immed01_i8:
-; CHECK: ceqbi
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp eq i8 %arg1, 127
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i8 @icmp_eq_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_eq_immed02_i8:
-; CHECK: ceqbi
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp eq i8 %arg1, -128
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i8 @icmp_eq_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_eq_immed03_i8:
-; CHECK: ceqbi
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp eq i8 %arg1, -1
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i8 @icmp_ne_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_ne_select_i8:
-; CHECK: ceqb
-; CHECK: selb $3, $5, $6, $3
-
-entry:
- %A = icmp ne i8 %arg1, %arg2
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i1 @icmp_ne_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_ne_setcc_i8:
-; CHECK: ceqb
-; CHECK: xorbi
-; CHECK-NEXT: bi
-
-entry:
- %A = icmp ne i8 %arg1, %arg2
- ret i1 %A
-}
-
-define i8 @icmp_ne_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_ne_immed01_i8:
-; CHECK: ceqbi
-; CHECK: selb $3, $4, $5, $3
-
-entry:
- %A = icmp ne i8 %arg1, 127
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i8 @icmp_ne_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_ne_immed02_i8:
-; CHECK: ceqbi
-; CHECK: selb $3, $4, $5, $3
-
-entry:
- %A = icmp ne i8 %arg1, -128
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i8 @icmp_ne_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_ne_immed03_i8:
-; CHECK: ceqbi
-; CHECK: selb $3, $4, $5, $3
-
-entry:
- %A = icmp ne i8 %arg1, -1
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i8 @icmp_ugt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_ugt_select_i8:
-; CHECK: clgtb
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp ugt i8 %arg1, %arg2
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i1 @icmp_ugt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_ugt_setcc_i8:
-; CHECK: clgtb
-; CHECK-NEXT: bi
-
-entry:
- %A = icmp ugt i8 %arg1, %arg2
- ret i1 %A
-}
-
-define i8 @icmp_ugt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_ugt_immed01_i8:
-; CHECK: clgtbi
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp ugt i8 %arg1, 126
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i8 @icmp_uge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_uge_select_i8:
-; CHECK: ceqb
-; CHECK: clgtb
-; CHECK: or
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp uge i8 %arg1, %arg2
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i1 @icmp_uge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_uge_setcc_i8:
-; CHECK: ceqb
-; CHECK: clgtb
-; CHECK: or
-; CHECK-NEXT: bi
-
-entry:
- %A = icmp uge i8 %arg1, %arg2
- ret i1 %A
-}
-
-;; Note: icmp uge i8 %arg1, <immed> can always be transformed into
-;; icmp ugt i8 %arg1, <immed>-1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i8 @icmp_ult_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_ult_select_i8:
-; CHECK: ceqb
-; CHECK: clgtb
-; CHECK: nor
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp ult i8 %arg1, %arg2
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i1 @icmp_ult_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_ult_setcc_i8:
-; CHECK: ceqb
-; CHECK: clgtb
-; CHECK: nor
-; CHECK-NEXT: bi
-
-entry:
- %A = icmp ult i8 %arg1, %arg2
- ret i1 %A
-}
-
-define i8 @icmp_ult_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_ult_immed01_i8:
-; CHECK: ceqbi
-; CHECK: clgtbi
-; CHECK: nor
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp ult i8 %arg1, 253
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i8 @icmp_ult_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_ult_immed02_i8:
-; CHECK: ceqbi
-; CHECK: clgtbi
-; CHECK: nor
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp ult i8 %arg1, 129
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i8 @icmp_ule_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_ule_select_i8:
-; CHECK: clgtb
-; CHECK: selb $3, $5, $6, $3
-
-entry:
- %A = icmp ule i8 %arg1, %arg2
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i1 @icmp_ule_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_ule_setcc_i8:
-; CHECK: clgtb
-; CHECK: xorbi
-; CHECK-NEXT: bi
-
-entry:
- %A = icmp ule i8 %arg1, %arg2
- ret i1 %A
-}
-
-;; Note: icmp ule i8 %arg1, <immed> can always be transformed into
-;; icmp ult i8 %arg1, <immed>+1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i8 @icmp_sgt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_sgt_select_i8:
-; CHECK: cgtb
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp sgt i8 %arg1, %arg2
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i1 @icmp_sgt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_sgt_setcc_i8:
-; CHECK: cgtb
-; CHECK-NEXT: bi
-
-entry:
- %A = icmp sgt i8 %arg1, %arg2
- ret i1 %A
-}
-
-define i8 @icmp_sgt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_sgt_immed01_i8:
-; CHECK: cgtbi
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp sgt i8 %arg1, 96
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i8 @icmp_sgt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_sgt_immed02_i8:
-; CHECK: cgtbi
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp sgt i8 %arg1, -1
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i8 @icmp_sgt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_sgt_immed03_i8:
-; CHECK: ceqbi
-; CHECK: selb $3, $4, $5, $3
-
-entry:
- %A = icmp sgt i8 %arg1, -128
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i8 @icmp_sge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_sge_select_i8:
-; CHECK: ceqb
-; CHECK: cgtb
-; CHECK: or
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp sge i8 %arg1, %arg2
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i1 @icmp_sge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_sge_setcc_i8:
-; CHECK: ceqb
-; CHECK: cgtb
-; CHECK: or
-; CHECK-NEXT: bi
-
-entry:
- %A = icmp sge i8 %arg1, %arg2
- ret i1 %A
-}
-
-;; Note: icmp sge i8 %arg1, <immed> can always be transformed into
-;; icmp sgt i8 %arg1, <immed>-1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
-define i8 @icmp_slt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_slt_select_i8:
-; CHECK: ceqb
-; CHECK: cgtb
-; CHECK: nor
-; CHECK: selb $3, $6, $5, $3
-
-entry:
- %A = icmp slt i8 %arg1, %arg2
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i1 @icmp_slt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_slt_setcc_i8:
-; CHECK: ceqb
-; CHECK: cgtb
-; CHECK: nor
-; CHECK-NEXT: bi
-
-entry:
- %A = icmp slt i8 %arg1, %arg2
- ret i1 %A
-}
-
-define i8 @icmp_slt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_slt_immed01_i8:
-; CHECK: ceqbi
-; CHECK: cgtbi
-; CHECK: nor
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp slt i8 %arg1, 96
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i8 @icmp_slt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_slt_immed02_i8:
-; CHECK: ceqbi
-; CHECK: cgtbi
-; CHECK: nor
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp slt i8 %arg1, -120
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i8 @icmp_slt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_slt_immed03_i8:
-; CHECK: ceqbi
-; CHECK: cgtbi
-; CHECK: nor
-; CHECK: selb $3, $5, $4, $3
-
-entry:
- %A = icmp slt i8 %arg1, -1
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i8 @icmp_sle_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_sle_select_i8:
-; CHECK: cgtb
-; CHECK: selb $3, $5, $6, $3
-
-entry:
- %A = icmp sle i8 %arg1, %arg2
- %B = select i1 %A, i8 %val1, i8 %val2
- ret i8 %B
-}
-
-define i1 @icmp_sle_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
-; CHECK: icmp_sle_setcc_i8:
-; CHECK: cgtb
-; CHECK: xorbi
-; CHECK-NEXT: bi
-
-entry:
- %A = icmp sle i8 %arg1, %arg2
- ret i1 %A
-}
-
-;; Note: icmp sle i8 %arg1, <immed> can always be transformed into
-;; icmp slt i8 %arg1, <immed>+1
-;;
-;; Consequently, even though the patterns exist to match, it's unlikely
-;; they'll ever be generated.
-
diff --git a/test/CodeGen/CellSPU/immed16.ll b/test/CodeGen/CellSPU/immed16.ll
deleted file mode 100644
index 077d07169e..0000000000
--- a/test/CodeGen/CellSPU/immed16.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep "ilh" %t1.s | count 11
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define i16 @test_1() {
- %x = alloca i16, align 16
- store i16 419, i16* %x ;; ILH via pattern
- ret i16 0
-}
-
-define i16 @test_2() {
- %x = alloca i16, align 16
- store i16 1023, i16* %x ;; ILH via pattern
- ret i16 0
-}
-
-define i16 @test_3() {
- %x = alloca i16, align 16
- store i16 -1023, i16* %x ;; ILH via pattern
- ret i16 0
-}
-
-define i16 @test_4() {
- %x = alloca i16, align 16
- store i16 32767, i16* %x ;; ILH via pattern
- ret i16 0
-}
-
-define i16 @test_5() {
- %x = alloca i16, align 16
- store i16 -32768, i16* %x ;; ILH via pattern
- ret i16 0
-}
-
-define i16 @test_6() {
- ret i16 0
-}
-
-
diff --git a/test/CodeGen/CellSPU/immed32.ll b/test/CodeGen/CellSPU/immed32.ll
deleted file mode 100644
index 8e48f0b52c..0000000000
--- a/test/CodeGen/CellSPU/immed32.ll
+++ /dev/null
@@ -1,83 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep ilhu %t1.s | count 9
-; RUN: grep iohl %t1.s | count 7
-; RUN: grep -w il %t1.s | count 3
-; RUN: grep 16429 %t1.s | count 1
-; RUN: grep 63572 %t1.s | count 1
-; RUN: grep 128 %t1.s | count 1
-; RUN: grep 32639 %t1.s | count 1
-; RUN: grep 65535 %t1.s | count 1
-; RUN: grep 16457 %t1.s | count 1
-; RUN: grep 4059 %t1.s | count 1
-; RUN: grep 49077 %t1.s | count 1
-; RUN: grep 1267 %t1.s | count 2
-; RUN: grep 16309 %t1.s | count 1
-; RUN: cat %t1.s | FileCheck %s
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define i32 @test_1() {
- ret i32 4784128 ;; ILHU via pattern (0x49000)
-}
-
-define i32 @test_2() {
- ret i32 5308431 ;; ILHU/IOHL via pattern (0x5100f)
-}
-
-define i32 @test_3() {
- ret i32 511 ;; IL via pattern
-}
-
-define i32 @test_4() {
- ret i32 -512 ;; IL via pattern
-}
-
-define i32 @test_5()
-{
-;CHECK: test_5:
-;CHECK-NOT: ila $3, 40000
-;CHECK: ilhu
-;CHECK: iohl
-;CHECK: bi $lr
- ret i32 400000
-}
-
-;; double float floatval
-;; 0x4005bf0a80000000 0x402d|f854 2.718282
-define float @float_const_1() {
- ret float 0x4005BF0A80000000 ;; ILHU/IOHL
-}
-
-;; double float floatval
-;; 0x3810000000000000 0x0080|0000 0.000000
-define float @float_const_2() {
- ret float 0x3810000000000000 ;; IL 128
-}
-
-;; double float floatval
-;; 0x47efffffe0000000 0x7f7f|ffff NaN
-define float @float_const_3() {
- ret float 0x47EFFFFFE0000000 ;; ILHU/IOHL via pattern
-}
-
-;; double float floatval
-;; 0x400921fb60000000 0x4049|0fdb 3.141593
-define float @float_const_4() {
- ret float 0x400921FB60000000 ;; ILHU/IOHL via pattern
-}
-
-;; double float floatval
-;; 0xbff6a09e60000000 0xbfb5|04f3 -1.414214
-define float @float_const_5() {
- ret float 0xBFF6A09E60000000 ;; ILHU/IOHL via pattern
-}
-
-;; double float floatval
-;; 0x3ff6a09e60000000 0x3fb5|04f3 1.414214
-define float @float_const_6() {
- ret float 0x3FF6A09E60000000 ;; ILHU/IOHL via pattern
-}
-
-define float @float_const_7() {
- ret float 0.000000e+00 ;; IL 0 via pattern
-}
diff --git a/test/CodeGen/CellSPU/immed64.ll b/test/CodeGen/CellSPU/immed64.ll
deleted file mode 100644
index fd48365175..0000000000
--- a/test/CodeGen/CellSPU/immed64.ll
+++ /dev/null
@@ -1,95 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep lqa %t1.s | count 13
-; RUN: grep ilhu %t1.s | count 15
-; RUN: grep ila %t1.s | count 1
-; RUN: grep -w il %t1.s | count 6
-; RUN: grep shufb %t1.s | count 13
-; RUN: grep 65520 %t1.s | count 1
-; RUN: grep 43981 %t1.s | count 1
-; RUN: grep 13702 %t1.s | count 1
-; RUN: grep 28225 %t1.s | count 1
-; RUN: grep 30720 %t1.s | count 1
-; RUN: grep 3233857728 %t1.s | count 8
-; RUN: grep 2155905152 %t1.s | count 6
-; RUN: grep 66051 %t1.s | count 7
-; RUN: grep 471670303 %t1.s | count 11
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; 1311768467750121234 => 0x 12345678 abcdef12 (4660,22136/43981,61202)
-; 18446744073709551591 => 0x ffffffff ffffffe7 (-25)
-; 18446744073708516742 => 0x ffffffff fff03586 (-1034874)
-; 5308431 => 0x 00000000 0051000F
-; 9223372038704560128 => 0x 80000000 6e417800
-
-define i64 @i64_const_1() {
- ret i64 1311768467750121234 ;; Constant pool spill
-}
-
-define i64 @i64_const_2() {
- ret i64 18446744073709551591 ;; IL/SHUFB
-}
-
-define i64 @i64_const_3() {
- ret i64 18446744073708516742 ;; IHLU/IOHL/SHUFB
-}
-
-define i64 @i64_const_4() {
- ret i64 5308431 ;; ILHU/IOHL/SHUFB
-}
-
-define i64 @i64_const_5() {
- ret i64 511 ;; IL/SHUFB
-}
-
-define i64 @i64_const_6() {
- ret i64 -512 ;; IL/SHUFB
-}
-
-define i64 @i64_const_7() {
- ret i64 9223372038704560128 ;; IHLU/IOHL/SHUFB
-}
-
-define i64 @i64_const_8() {
- ret i64 0 ;; IL
-}
-
-define i64 @i64_const_9() {
- ret i64 -1 ;; IL
-}
-
-define i64 @i64_const_10() {
- ret i64 281470681808895 ;; IL 65535
-}
-
-; 0x4005bf0a8b145769 ->
-; (ILHU 0x4005 [16389]/IOHL 0xbf0a [48906])
-; (ILHU 0x8b14 [35604]/IOHL 0x5769 [22377])
-define double @f64_const_1() {
- ret double 0x4005bf0a8b145769 ;; ILHU/IOHL via pattern
-}
-
-define double @f64_const_2() {
- ret double 0x0010000000000000
-}
-
-define double @f64_const_3() {
- ret double 0x7fefffffffffffff
-}
-
-define double @f64_const_4() {
- ret double 0x400921fb54442d18
-}
-
-define double @f64_const_5() {
- ret double 0xbff6a09e667f3bcd ;; ILHU/IOHL via pattern
-}
-
-define double @f64_const_6() {
- ret double 0x3ff6a09e667f3bcd
-}
-
-define double @f64_const_7() {
- ret double 0.000000e+00
-}
diff --git a/test/CodeGen/CellSPU/int2fp.ll b/test/CodeGen/CellSPU/int2fp.ll
deleted file mode 100644
index 984c017c96..0000000000
--- a/test/CodeGen/CellSPU/int2fp.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep csflt %t1.s | count 5
-; RUN: grep cuflt %t1.s | count 1
-; RUN: grep xshw %t1.s | count 2
-; RUN: grep xsbh %t1.s | count 1
-; RUN: grep and %t1.s | count 2
-; RUN: grep andi %t1.s | count 1
-; RUN: grep ila %t1.s | count 1
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define float @sitofp_i32(i32 %arg1) {
- %A = sitofp i32 %arg1 to float ; <float> [#uses=1]
- ret float %A
-}
-
-define float @uitofp_u32(i32 %arg1) {
- %A = uitofp i32 %arg1 to float ; <float> [#uses=1]
- ret float %A
-}
-
-define float @sitofp_i16(i16 %arg1) {
- %A = sitofp i16 %arg1 to float ; <float> [#uses=1]
- ret float %A
-}
-
-define float @uitofp_i16(i16 %arg1) {
- %A = uitofp i16 %arg1 to float ; <float> [#uses=1]
- ret float %A
-}
-
-define float @sitofp_i8(i8 %arg1) {
- %A = sitofp i8 %arg1 to float ; <float> [#uses=1]
- ret float %A
-}
-
-define float @uitofp_i8(i8 %arg1) {
- %A = uitofp i8 %arg1 to float ; <float> [#uses=1]
- ret float %A
-}
diff --git a/test/CodeGen/CellSPU/intrinsics_branch.ll b/test/CodeGen/CellSPU/intrinsics_branch.ll
deleted file mode 100644
index b0f6a6247e..0000000000
--- a/test/CodeGen/CellSPU/intrinsics_branch.ll
+++ /dev/null
@@ -1,150 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep ceq %t1.s | count 30
-; RUN: grep ceqb %t1.s | count 10
-; RUN: grep ceqhi %t1.s | count 5
-; RUN: grep ceqi %t1.s | count 5
-; RUN: grep cgt %t1.s | count 30
-; RUN: grep cgtb %t1.s | count 10
-; RUN: grep cgthi %t1.s | count 5
-; RUN: grep cgti %t1.s | count 5
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
-
-declare <4 x i32> @llvm.spu.si.ceq(<4 x i32>, <4 x i32>)
-declare <16 x i8> @llvm.spu.si.ceqb(<16 x i8>, <16 x i8>)
-declare <8 x i16> @llvm.spu.si.ceqh(<8 x i16>, <8 x i16>)
-declare <4 x i32> @llvm.spu.si.ceqi(<4 x i32>, i16)
-declare <8 x i16> @llvm.spu.si.ceqhi(<8 x i16>, i16)
-declare <16 x i8> @llvm.spu.si.ceqbi(<16 x i8>, i8)
-
-declare <4 x i32> @llvm.spu.si.cgt(<4 x i32>, <4 x i32>)
-declare <16 x i8> @llvm.spu.si.cgtb(<16 x i8>, <16 x i8>)
-declare <8 x i16> @llvm.spu.si.cgth(<8 x i16>, <8 x i16>)
-declare <4 x i32> @llvm.spu.si.cgti(<4 x i32>, i16)
-declare <8 x i16> @llvm.spu.si.cgthi(<8 x i16>, i16)
-declare <16 x i8> @llvm.spu.si.cgtbi(<16 x i8>, i8)
-
-declare <4 x i32> @llvm.spu.si.clgt(<4 x i32>, <4 x i32>)
-declare <16 x i8> @llvm.spu.si.clgtb(<16 x i8>, <16 x i8>)
-declare <8 x i16> @llvm.spu.si.clgth(<8 x i16>, <8 x i16>)
-declare <4 x i32> @llvm.spu.si.clgti(<4 x i32>, i16)
-declare <8 x i16> @llvm.spu.si.clgthi(<8 x i16>, i16)
-declare <16 x i8> @llvm.spu.si.clgtbi(<16 x i8>, i8)
-
-
-
-define <4 x i32> @test(<4 x i32> %A) {
- call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
- %Y = bitcast <4 x i32> %1 to <4 x i32>
- ret <4 x i32> %Y
-}
-
-define <4 x i32> @ceqtest(<4 x i32> %A, <4 x i32> %B) {
- call <4 x i32> @llvm.spu.si.ceq(<4 x i32> %A, <4 x i32> %B)
- %Y = bitcast <4 x i32> %1 to <4 x i32>
- ret <4 x i32> %Y
-}
-
-define <8 x i16> @ceqhtest(<8 x i16> %A, <8 x i16> %B) {
- call <8 x i16> @llvm.spu.si.ceqh(<8 x i16> %A, <8 x i16> %B)
- %Y = bitcast <8 x i16> %1 to <8 x i16>
- ret <8 x i16> %Y
-}
-
-define <16 x i8> @ceqbtest(<16 x i8> %A, <16 x i8> %B) {
- call <16 x i8> @llvm.spu.si.ceqb(<16 x i8> %A, <16 x i8> %B)
- %Y = bitcast <16 x i8> %1 to <16 x i8>
- ret <16 x i8> %Y
-}
-
-define <4 x i32> @ceqitest(<4 x i32> %A) {
- call <4 x i32> @llvm.spu.si.ceqi(<4 x i32> %A, i16 65)
- %Y = bitcast <4 x i32> %1 to <4 x i32>
- ret <4 x i32> %Y
-}
-
-define <8 x i16> @ceqhitest(<8 x i16> %A) {
- call <8 x i16> @llvm.spu.si.ceqhi(<8 x i16> %A, i16 65)
- %Y = bitcast <8 x i16> %1 to <8 x i16>
- ret <8 x i16> %Y
-}
-
-define <16 x i8> @ceqbitest(<16 x i8> %A) {
- call <16 x i8> @llvm.spu.si.ceqbi(<16 x i8> %A, i8 65)
- %Y = bitcast <16 x i8> %1 to <16 x i8>
- ret <16 x i8> %Y
-}
-
-define <4 x i32> @cgttest(<4 x i32> %A, <4 x i32> %B) {
- call <4 x i32> @llvm.spu.si.cgt(<4 x i32> %A, <4 x i32> %B)
- %Y = bitcast <4 x i32> %1 to <4 x i32>
- ret <4 x i32> %Y
-}
-
-define <8 x i16> @cgthtest(<8 x i16> %A, <8 x i16> %B) {
- call <8 x i16> @llvm.spu.si.cgth(<8 x i16> %A, <8 x i16> %B)
- %Y = bitcast <8 x i16> %1 to <8 x i16>
- ret <8 x i16> %Y
-}
-
-define <16 x i8> @cgtbtest(<16 x i8> %A, <16 x i8> %B) {
- call <16 x i8> @llvm.spu.si.cgtb(<16 x i8> %A, <16 x i8> %B)
- %Y = bitcast <16 x i8> %1 to <16 x i8>
- ret <16 x i8> %Y
-}
-
-define <4 x i32> @cgtitest(<4 x i32> %A) {
- call <4 x i32> @llvm.spu.si.cgti(<4 x i32> %A, i16 65)
- %Y = bitcast <4 x i32> %1 to <4 x i32>
- ret <4 x i32> %Y
-}
-
-define <8 x i16> @cgthitest(<8 x i16> %A) {
- call <8 x i16> @llvm.spu.si.cgthi(<8 x i16> %A, i16 65)
- %Y = bitcast <8 x i16> %1 to <8 x i16>
- ret <8 x i16> %Y
-}
-
-define <16 x i8> @cgtbitest(<16 x i8> %A) {
- call <16 x i8> @llvm.spu.si.cgtbi(<16 x i8> %A, i8 65)
- %Y = bitcast <16 x i8> %1 to <16 x i8>
- ret <16 x i8> %Y
-}
-
-define <4 x i32> @clgttest(<4 x i32> %A, <4 x i32> %B) {
- call <4 x i32> @llvm.spu.si.clgt(<4 x i32> %A, <4 x i32> %B)
- %Y = bitcast <4 x i32> %1 to <4 x i32>
- ret <4 x i32> %Y
-}
-
-define <8 x i16> @clgthtest(<8 x i16> %A, <8 x i16> %B) {
- call <8 x i16> @llvm.spu.si.clgth(<8 x i16> %A, <8 x i16> %B)
- %Y = bitcast <8 x i16> %1 to <8 x i16>
- ret <8 x i16> %Y
-}
-
-define <16 x i8> @clgtbtest(<16 x i8> %A, <16 x i8> %B) {
- call <16 x i8> @llvm.spu.si.clgtb(<16 x i8> %A, <16 x i8> %B)
- %Y = bitcast <16 x i8> %1 to <16 x i8>
- ret <16 x i8> %Y
-}
-
-define <4 x i32> @clgtitest(<4 x i32> %A) {
- call <4 x i32> @llvm.spu.si.clgti(<4 x i32> %A, i16 65)
- %Y = bitcast <4 x i32> %1 to <4 x i32>
- ret <4 x i32> %Y
-}
-
-define <8 x i16> @clgthitest(<8 x i16> %A) {
- call <8 x i16> @llvm.spu.si.clgthi(<8 x i16> %A, i16 65)
- %Y = bitcast <8 x i16> %1 to <8 x i16>
- ret <8 x i16> %Y
-}
-
-define <16 x i8> @clgtbitest(<16 x i8> %A) {
- call <16 x i8> @llvm.spu.si.clgtbi(<16 x i8> %A, i8 65)
- %Y = bitcast <16 x i8> %1 to <16 x i8>
- ret <16 x i8> %Y
-}
diff --git a/test/CodeGen/CellSPU/intrinsics_float.ll b/test/CodeGen/CellSPU/intrinsics_float.ll
deleted file mode 100644
index 81373470d0..0000000000
--- a/test/CodeGen/CellSPU/intrinsics_float.ll
+++ /dev/null
@@ -1,94 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep fa %t1.s | count 5
-; RUN: grep fs %t1.s | count 5
-; RUN: grep fm %t1.s | count 15
-; RUN: grep fceq %t1.s | count 5
-; RUN: grep fcmeq %t1.s | count 5
-; RUN: grep fcgt %t1.s | count 5
-; RUN: grep fcmgt %t1.s | count 5
-; RUN: grep fma %t1.s | count 5
-; RUN: grep fnms %t1.s | count 5
-; RUN: grep fms %t1.s | count 5
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
-
-declare <4 x float> @llvm.spu.si.fa(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fs(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fm(<4 x float>, <4 x float>)
-
-declare <4 x float> @llvm.spu.si.fceq(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fcmeq(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fcgt(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fcmgt(<4 x float>, <4 x float>)
-
-declare <4 x float> @llvm.spu.si.fma(<4 x float>, <4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fnms(<4 x float>, <4 x float>, <4 x float>)
-declare <4 x float> @llvm.spu.si.fms(<4 x float>, <4 x float>, <4 x float>)
-
-define <4 x i32> @test(<4 x i32> %A) {
- call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
- %Y = bitcast <4 x i32> %1 to <4 x i32>
- ret <4 x i32> %Y
-}
-
-define <4 x float> @fatest(<4 x float> %A, <4 x float> %B) {
- call <4 x float> @llvm.spu.si.fa(<4 x float> %A, <4 x float> %B)
- %Y = bitcast <4 x float> %1 to <4 x float>
- ret <4 x float> %Y
-}
-
-define <4 x float> @fstest(<4 x float> %A, <4 x float> %B) {
- call <4 x float> @llvm.spu.si.fs(<4 x float> %A, <4 x float> %B)
- %Y = bitcast <4 x float> %1 to <4 x float>
- ret <4 x float> %Y
-}
-
-define <4 x float> @fmtest(<4 x float> %A, <4 x float> %B) {
- call <4 x float> @llvm.spu.si.fm(<4 x float> %A, <4 x float> %B)
- %Y = bitcast <4 x float> %1 to <4 x float>
- ret <4 x float> %Y
-}
-
-define <4 x float> @fceqtest(<4 x float> %A, <4 x float> %B) {
- call <4 x float> @llvm.spu.si.fceq(<4 x float> %A, <4 x float> %B)
- %Y = bitcast <4 x float> %1 to <4 x float>
- ret <4 x float> %Y
-}
-
-define <4 x float> @fcmeqtest(<4 x float> %A, <4 x float> %B) {
- call <4 x float> @llvm.spu.si.fcmeq(<4 x float> %A, <4 x float> %B)
- %Y = bitcast <4 x float> %1 to <4 x float>
- ret <4 x float> %Y
-}
-
-define <4 x float> @fcgttest(<4 x float> %A, <4 x float> %B) {
- call <4 x float> @llvm.spu.si.fcgt(<4 x float> %A, <4 x float> %B)
- %Y = bitcast <4 x float> %1 to <4 x float>
- ret <4 x float> %Y
-}
-
-define <4 x float> @fcmgttest(<4 x float> %A, <4 x float> %B) {
- call <4 x float> @llvm.spu.si.fcmgt(<4 x float> %A, <4 x float> %B)
- %Y = bitcast <4 x float> %1 to <4 x float>
- ret <4 x float> %Y
-}
-
-define <4 x float> @fmatest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
- call <4 x float> @llvm.spu.si.fma(<4 x float> %A, <4 x float> %B, <4 x float> %C)
- %Y = bitcast <4 x float> %1 to <4 x float>
- ret <4 x float> %Y
-}
-
-define <4 x float> @fnmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
- call <4 x float> @llvm.spu.si.fnms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
- %Y = bitcast <4 x float> %1 to <4 x float>
- ret <4 x float> %Y
-}
-
-define <4 x float> @fmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
- call <4 x float> @llvm.spu.si.fms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
- %Y = bitcast <4 x float> %1 to <4 x float>
- ret <4 x float> %Y
-}
diff --git a/test/CodeGen/CellSPU/intrinsics_logical.ll b/test/CodeGen/CellSPU/intrinsics_logical.ll
deleted file mode 100644
index a29ee4c240..0000000000
--- a/test/CodeGen/CellSPU/intrinsics_logical.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep and %t1.s | count 20
-; RUN: grep andc %t1.s | count 5
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-declare <4 x i32> @llvm.spu.si.and(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.spu.si.andc(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.spu.si.andi(<4 x i32>, i16)
-declare <8 x i16> @llvm.spu.si.andhi(<8 x i16>, i16)
-declare <16 x i8> @llvm.spu.si.andbi(<16 x i8>, i8)
-
-declare <4 x i32> @llvm.spu.si.or(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.spu.si.orc(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.spu.si.ori(<4 x i32>, i16)
-declare <8 x i16> @llvm.spu.si.orhi(<8 x i16>, i16)
-declare <16 x i8> @llvm.spu.si.orbi(<16 x i8>, i8)
-
-declare <4 x i32> @llvm.spu.si.xor(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.spu.si.xori(<4 x i32>, i16)
-declare <8 x i16> @llvm.spu.si.xorhi(<8 x i16>, i16)
-declare <16 x i8> @llvm.spu.si.xorbi(<16 x i8>, i8)
-
-declare <4 x i32> @llvm.spu.si.nand(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.spu.si.nor(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @andtest(<4 x i32> %A, <4 x i32> %B) {
- call <4 x i32> @llvm.spu.si.and(<4 x i32> %A, <4 x i32> %B)
- %Y = bitcast <4 x i32> %1 to <4 x i32>
- ret <4 x i32> %Y
-}
-
-define <4 x i32> @andctest(<4 x i32> %A, <4 x i32> %B) {
- call <4 x i32> @llvm.spu.si.andc(<4 x i32> %A, <4 x i32> %B)
- %Y = bitcast <4 x i32> %1 to <4 x i32>
- ret <4 x i32> %Y
-}
-
-define <4 x i32> @anditest(<4 x i32> %A) {
- call <4 x i32> @llvm.spu.si.andi(<4 x i32> %A, i16 65)
- %Y = bitcast <4 x i32> %1 to <4 x i32>
- ret <4 x i32> %Y
-}
-
-define <8 x i16> @andhitest(<8 x i16> %A) {
- call <8 x i16> @llvm.spu.si.andhi(<8 x i16> %A, i16 65)
- %Y = bitcast <8 x i16> %1 to <8 x i16>
- ret <8 x i16> %Y
-}
diff --git a/test/CodeGen/CellSPU/jumptable.ll b/test/CodeGen/CellSPU/jumptable.ll
deleted file mode 100644
index 66c2fdeb51..0000000000
--- a/test/CodeGen/CellSPU/jumptable.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-;RUN: llc --march=cellspu -disable-cgp-branch-opts %s -o - | FileCheck %s
-; This is to check that emitting jumptables doesn't crash llc
-define i32 @test(i32 %param) {
-entry:
-;CHECK: ai {{\$.}}, $3, -1
-;CHECK: clgti {{\$., \$.}}, 3
-;CHECK: brnz {{\$.}},.LBB0_
- switch i32 %param, label %bb2 [
- i32 1, label %bb1
- i32 2, label %bb2
- i32 3, label %bb3
- i32 4, label %bb2
- ]
-;CHECK-NOT: # BB#2
-bb1:
- ret i32 1
-bb2:
- ret i32 2
-bb3:
- ret i32 %param
-}
diff --git a/test/CodeGen/CellSPU/loads.ll b/test/CodeGen/CellSPU/loads.ll
deleted file mode 100644
index 4771752f5f..0000000000
--- a/test/CodeGen/CellSPU/loads.ll
+++ /dev/null
@@ -1,59 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-; ModuleID = 'loads.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define <4 x float> @load_v4f32_1(<4 x float>* %a) nounwind readonly {
-entry:
- %tmp1 = load <4 x float>* %a
- ret <4 x float> %tmp1
-; CHECK: lqd $3, 0($3)
-}
-
-define <4 x float> @load_v4f32_2(<4 x float>* %a) nounwind readonly {
-entry:
- %arrayidx = getelementptr <4 x float>* %a, i32 1
- %tmp1 = load <4 x float>* %arrayidx
- ret <4 x float> %tmp1
-; CHECK: lqd $3, 16($3)
-}
-
-
-declare <4 x i32>* @getv4f32ptr()
-define <4 x i32> @func() {
- ;CHECK: brasl
- ; we need to have some instruction to move the result to safety.
- ; which instruction (lr, stqd...) depends on the regalloc
- ;CHECK: {{.*}}
- ;CHECK: brasl
- %rv1 = call <4 x i32>* @getv4f32ptr()
- %rv2 = call <4 x i32>* @getv4f32ptr()
- %rv3 = load <4 x i32>* %rv1
- ret <4 x i32> %rv3
-}
-
-define <4 x float> @load_undef(){
- ; CHECK: lqd $3, 0($3)
- %val = load <4 x float>* undef
- ret <4 x float> %val
-}
-
-;check that 'misaligned' loads that may span two memory chunks
-;have two loads. Don't check for the bitmanipulation, as that
-;might change with improved algorithms or scheduling
-define i32 @load_misaligned( i32* %ptr ){
-;CHECK: load_misaligned
-;CHECK: lqd
-;CHECK: lqd
-;CHECK: bi $lr
- %rv = load i32* %ptr, align 2
- ret i32 %rv
-}
-
-define <4 x i32> @load_null_vec( ) {
-;CHECK: lqa
-;CHECK: bi $lr
- %rv = load <4 x i32>* null
- ret <4 x i32> %rv
-}
diff --git a/test/CodeGen/CellSPU/mul-with-overflow.ll b/test/CodeGen/CellSPU/mul-with-overflow.ll
deleted file mode 100644
index c04e69e3e1..0000000000
--- a/test/CodeGen/CellSPU/mul-with-overflow.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=cellspu
-
-declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
-define zeroext i1 @a(i16 %x) nounwind {
- %res = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %x, i16 3)
- %obil = extractvalue {i16, i1} %res, 1
- ret i1 %obil
-}
-
-declare {i16, i1} @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
-define zeroext i1 @b(i16 %x) nounwind {
- %res = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %x, i16 3)
- %obil = extractvalue {i16, i1} %res, 1
- ret i1 %obil
-}
diff --git a/test/CodeGen/CellSPU/mul_ops.ll b/test/CodeGen/CellSPU/mul_ops.ll
deleted file mode 100644
index 1e28fc7a91..0000000000
--- a/test/CodeGen/CellSPU/mul_ops.ll
+++ /dev/null
@@ -1,88 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep mpy %t1.s | count 44
-; RUN: grep mpyu %t1.s | count 4
-; RUN: grep mpyh %t1.s | count 10
-; RUN: grep mpyhh %t1.s | count 2
-; RUN: grep rotma %t1.s | count 12
-; RUN: grep rotmahi %t1.s | count 4
-; RUN: grep and %t1.s | count 2
-; RUN: grep selb %t1.s | count 6
-; RUN: grep fsmbi %t1.s | count 4
-; RUN: grep shli %t1.s | count 4
-; RUN: grep shlhi %t1.s | count 4
-; RUN: grep ila %t1.s | count 2
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; 32-bit multiply instruction generation:
-define <4 x i32> @mpy_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
-entry:
- %A = mul <4 x i32> %arg1, %arg2
- ret <4 x i32> %A
-}
-
-define <4 x i32> @mpy_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
-entry:
- %A = mul <4 x i32> %arg2, %arg1
- ret <4 x i32> %A
-}
-
-define <8 x i16> @mpy_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
-entry:
- %A = mul <8 x i16> %arg1, %arg2
- ret <8 x i16> %A
-}
-
-define <8 x i16> @mpy_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
-entry:
- %A = mul <8 x i16> %arg2, %arg1
- ret <8 x i16> %A
-}
-
-define <16 x i8> @mul_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
-entry:
- %A = mul <16 x i8> %arg2, %arg1
- ret <16 x i8> %A
-}
-
-define <16 x i8> @mul_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
-entry:
- %A = mul <16 x i8> %arg1, %arg2
- ret <16 x i8> %A
-}
-
-define i32 @mul_i32_1(i32 %arg1, i32 %arg2) {
-entry:
- %A = mul i32 %arg2, %arg1
- ret i32 %A
-}
-
-define i32 @mul_i32_2(i32 %arg1, i32 %arg2) {
-entry:
- %A = mul i32 %arg1, %arg2
- ret i32 %A
-}
-
-define i16 @mul_i16_1(i16 %arg1, i16 %arg2) {
-entry:
- %A = mul i16 %arg2, %arg1
- ret i16 %A
-}
-
-define i16 @mul_i16_2(i16 %arg1, i16 %arg2) {
-entry:
- %A = mul i16 %arg1, %arg2
- ret i16 %A
-}
-
-define i8 @mul_i8_1(i8 %arg1, i8 %arg2) {
-entry:
- %A = mul i8 %arg2, %arg1
- ret i8 %A
-}
-
-define i8 @mul_i8_2(i8 %arg1, i8 %arg2) {
-entry:
- %A = mul i8 %arg1, %arg2
- ret i8 %A
-}
diff --git a/test/CodeGen/CellSPU/nand.ll b/test/CodeGen/CellSPU/nand.ll
deleted file mode 100644
index 57ac709c54..0000000000
--- a/test/CodeGen/CellSPU/nand.ll
+++ /dev/null
@@ -1,125 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep nand %t1.s | count 90
-; RUN: grep and %t1.s | count 94
-; RUN: grep xsbh %t1.s | count 2
-; RUN: grep xshw %t1.s | count 4
-
-; CellSPU legalization is over-sensitive to Legalize's traversal order.
-; XFAIL: *
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define <4 x i32> @nand_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
- %A = and <4 x i32> %arg2, %arg1 ; <<4 x i32>> [#uses=1]
- %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 >
- ret <4 x i32> %B
-}
-
-define <4 x i32> @nand_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
- %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
- %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 >
- ret <4 x i32> %B
-}
-
-define <8 x i16> @nand_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
- %A = and <8 x i16> %arg2, %arg1 ; <<8 x i16>> [#uses=1]
- %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1,
- i16 -1, i16 -1, i16 -1, i16 -1 >
- ret <8 x i16> %B
-}
-
-define <8 x i16> @nand_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
- %A = and <8 x i16> %arg1, %arg2 ; <<8 x i16>> [#uses=1]
- %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1,
- i16 -1, i16 -1, i16 -1, i16 -1 >
- ret <8 x i16> %B
-}
-
-define <16 x i8> @nand_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
- %A = and <16 x i8> %arg2, %arg1 ; <<16 x i8>> [#uses=1]
- %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1 >
- ret <16 x i8> %B
-}
-
-define <16 x i8> @nand_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
- %A = and <16 x i8> %arg1, %arg2 ; <<16 x i8>> [#uses=1]
- %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1 >
- ret <16 x i8> %B
-}
-
-define i32 @nand_i32_1(i32 %arg1, i32 %arg2) {
- %A = and i32 %arg2, %arg1 ; <i32> [#uses=1]
- %B = xor i32 %A, -1 ; <i32> [#uses=1]
- ret i32 %B
-}
-
-define i32 @nand_i32_2(i32 %arg1, i32 %arg2) {
- %A = and i32 %arg1, %arg2 ; <i32> [#uses=1]
- %B = xor i32 %A, -1 ; <i32> [#uses=1]
- ret i32 %B
-}
-
-define signext i16 @nand_i16_1(i16 signext %arg1, i16 signext %arg2) {
- %A = and i16 %arg2, %arg1 ; <i16> [#uses=1]
- %B = xor i16 %A, -1 ; <i16> [#uses=1]
- ret i16 %B
-}
-
-define signext i16 @nand_i16_2(i16 signext %arg1, i16 signext %arg2) {
- %A = and i16 %arg1, %arg2 ; <i16> [#uses=1]
- %B = xor i16 %A, -1 ; <i16> [#uses=1]
- ret i16 %B
-}
-
-define zeroext i16 @nand_i16u_1(i16 zeroext %arg1, i16 zeroext %arg2) {
- %A = and i16 %arg2, %arg1 ; <i16> [#uses=1]
- %B = xor i16 %A, -1 ; <i16> [#uses=1]
- ret i16 %B
-}
-
-define zeroext i16 @nand_i16u_2(i16 zeroext %arg1, i16 zeroext %arg2) {
- %A = and i16 %arg1, %arg2 ; <i16> [#uses=1]
- %B = xor i16 %A, -1 ; <i16> [#uses=1]
- ret i16 %B
-}
-
-define zeroext i8 @nand_i8u_1(i8 zeroext %arg1, i8 zeroext %arg2) {
- %A = and i8 %arg2, %arg1 ; <i8> [#uses=1]
- %B = xor i8 %A, -1 ; <i8> [#uses=1]
- ret i8 %B
-}
-
-define zeroext i8 @nand_i8u_2(i8 zeroext %arg1, i8 zeroext %arg2) {
- %A = and i8 %arg1, %arg2 ; <i8> [#uses=1]
- %B = xor i8 %A, -1 ; <i8> [#uses=1]
- ret i8 %B
-}
-
-define signext i8 @nand_i8_1(i8 signext %arg1, i8 signext %arg2) {
- %A = and i8 %arg2, %arg1 ; <i8> [#uses=1]
- %B = xor i8 %A, -1 ; <i8> [#uses=1]
- ret i8 %B
-}
-
-define signext i8 @nand_i8_2(i8 signext %arg1, i8 signext %arg2) {
- %A = and i8 %arg1, %arg2 ; <i8> [#uses=1]
- %B = xor i8 %A, -1 ; <i8> [#uses=1]
- ret i8 %B
-}
-
-define i8 @nand_i8_3(i8 %arg1, i8 %arg2) {
- %A = and i8 %arg2, %arg1 ; <i8> [#uses=1]
- %B = xor i8 %A, -1 ; <i8> [#uses=1]
- ret i8 %B
-}
-
-define i8 @nand_i8_4(i8 %arg1, i8 %arg2) {
- %A = and i8 %arg1, %arg2 ; <i8> [#uses=1]
- %B = xor i8 %A, -1 ; <i8> [#uses=1]
- ret i8 %B
-}
diff --git a/test/CodeGen/CellSPU/or_ops.ll b/test/CodeGen/CellSPU/or_ops.ll
deleted file mode 100644
index f329266a3c..0000000000
--- a/test/CodeGen/CellSPU/or_ops.ll
+++ /dev/null
@@ -1,278 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep and %t1.s | count 2
-; RUN: grep orc %t1.s | count 85
-; RUN: grep ori %t1.s | count 34
-; RUN: grep orhi %t1.s | count 30
-; RUN: grep orbi %t1.s | count 15
-; RUN: FileCheck %s < %t1.s
-
-; CellSPU legalization is over-sensitive to Legalize's traversal order.
-; XFAIL: *
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; OR instruction generation:
-define <4 x i32> @or_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
- %A = or <4 x i32> %arg1, %arg2
- ret <4 x i32> %A
-}
-
-define <4 x i32> @or_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
- %A = or <4 x i32> %arg2, %arg1
- ret <4 x i32> %A
-}
-
-define <8 x i16> @or_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
- %A = or <8 x i16> %arg1, %arg2
- ret <8 x i16> %A
-}
-
-define <8 x i16> @or_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
- %A = or <8 x i16> %arg2, %arg1
- ret <8 x i16> %A
-}
-
-define <16 x i8> @or_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
- %A = or <16 x i8> %arg2, %arg1
- ret <16 x i8> %A
-}
-
-define <16 x i8> @or_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
- %A = or <16 x i8> %arg1, %arg2
- ret <16 x i8> %A
-}
-
-define i32 @or_i32_1(i32 %arg1, i32 %arg2) {
- %A = or i32 %arg2, %arg1
- ret i32 %A
-}
-
-define i32 @or_i32_2(i32 %arg1, i32 %arg2) {
- %A = or i32 %arg1, %arg2
- ret i32 %A
-}
-
-define i16 @or_i16_1(i16 %arg1, i16 %arg2) {
- %A = or i16 %arg2, %arg1
- ret i16 %A
-}
-
-define i16 @or_i16_2(i16 %arg1, i16 %arg2) {
- %A = or i16 %arg1, %arg2
- ret i16 %A
-}
-
-define i8 @or_i8_1(i8 %arg1, i8 %arg2) {
- %A = or i8 %arg2, %arg1
- ret i8 %A
-}
-
-define i8 @or_i8_2(i8 %arg1, i8 %arg2) {
- %A = or i8 %arg1, %arg2
- ret i8 %A
-}
-
-; ORC instruction generation:
-define <4 x i32> @orc_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
- %A = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 >
- %B = or <4 x i32> %arg1, %A
- ret <4 x i32> %B
-}
-
-define <4 x i32> @orc_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
- %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
- %B = or <4 x i32> %arg2, %A
- ret <4 x i32> %B
-}
-
-define <4 x i32> @orc_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) {
- %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
- %B = or <4 x i32> %A, %arg2
- ret <4 x i32> %B
-}
-
-define <8 x i16> @orc_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
- %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1,
- i16 -1, i16 -1, i16 -1, i16 -1 >
- %B = or <8 x i16> %arg1, %A
- ret <8 x i16> %B
-}
-
-define <8 x i16> @orc_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
- %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1,
- i16 -1, i16 -1, i16 -1, i16 -1 >
- %B = or <8 x i16> %arg2, %A
- ret <8 x i16> %B
-}
-
-define <16 x i8> @orc_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
- %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1 >
- %B = or <16 x i8> %arg2, %A
- ret <16 x i8> %B
-}
-
-define <16 x i8> @orc_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
- %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1 >
- %B = or <16 x i8> %arg1, %A
- ret <16 x i8> %B
-}
-
-define <16 x i8> @orc_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2) {
- %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1 >
- %B = or <16 x i8> %A, %arg1
- ret <16 x i8> %B
-}
-
-define i32 @orc_i32_1(i32 %arg1, i32 %arg2) {
- %A = xor i32 %arg2, -1
- %B = or i32 %A, %arg1
- ret i32 %B
-}
-
-define i32 @orc_i32_2(i32 %arg1, i32 %arg2) {
- %A = xor i32 %arg1, -1
- %B = or i32 %A, %arg2
- ret i32 %B
-}
-
-define i32 @orc_i32_3(i32 %arg1, i32 %arg2) {
- %A = xor i32 %arg2, -1
- %B = or i32 %arg1, %A
- ret i32 %B
-}
-
-define i16 @orc_i16_1(i16 %arg1, i16 %arg2) {
- %A = xor i16 %arg2, -1
- %B = or i16 %A, %arg1
- ret i16 %B
-}
-
-define i16 @orc_i16_2(i16 %arg1, i16 %arg2) {
- %A = xor i16 %arg1, -1
- %B = or i16 %A, %arg2
- ret i16 %B
-}
-
-define i16 @orc_i16_3(i16 %arg1, i16 %arg2) {
- %A = xor i16 %arg2, -1
- %B = or i16 %arg1, %A
- ret i16 %B
-}
-
-define i8 @orc_i8_1(i8 %arg1, i8 %arg2) {
- %A = xor i8 %arg2, -1
- %B = or i8 %A, %arg1
- ret i8 %B
-}
-
-define i8 @orc_i8_2(i8 %arg1, i8 %arg2) {
- %A = xor i8 %arg1, -1
- %B = or i8 %A, %arg2
- ret i8 %B
-}
-
-define i8 @orc_i8_3(i8 %arg1, i8 %arg2) {
- %A = xor i8 %arg2, -1
- %B = or i8 %arg1, %A
- ret i8 %B
-}
-
-; ORI instruction generation (i32 data type):
-define <4 x i32> @ori_v4i32_1(<4 x i32> %in) {
- %tmp2 = or <4 x i32> %in, < i32 511, i32 511, i32 511, i32 511 >
- ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @ori_v4i32_2(<4 x i32> %in) {
- %tmp2 = or <4 x i32> %in, < i32 510, i32 510, i32 510, i32 510 >
- ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @ori_v4i32_3(<4 x i32> %in) {
- %tmp2 = or <4 x i32> %in, < i32 -1, i32 -1, i32 -1, i32 -1 >
- ret <4 x i32> %tmp2
-}
-
-define <4 x i32> @ori_v4i32_4(<4 x i32> %in) {
- %tmp2 = or <4 x i32> %in, < i32 -512, i32 -512, i32 -512, i32 -512 >
- ret <4 x i32> %tmp2
-}
-
-define zeroext i32 @ori_u32(i32 zeroext %in) {
- %tmp37 = or i32 %in, 37 ; <i32> [#uses=1]
- ret i32 %tmp37
-}
-
-define signext i32 @ori_i32(i32 signext %in) {
- %tmp38 = or i32 %in, 37 ; <i32> [#uses=1]
- ret i32 %tmp38
-}
-
-define i32 @ori_i32_600(i32 %in) {
- ;600 does not fit into 'ori' immediate field
- ;CHECK: ori_i32_600
- ;CHECK: il
- ;CHECK: ori
- %tmp = or i32 %in, 600
- ret i32 %tmp
-}
-
-; ORHI instruction generation (i16 data type):
-define <8 x i16> @orhi_v8i16_1(<8 x i16> %in) {
- %tmp2 = or <8 x i16> %in, < i16 511, i16 511, i16 511, i16 511,
- i16 511, i16 511, i16 511, i16 511 >
- ret <8 x i16> %tmp2
-}
-
-define <8 x i16> @orhi_v8i16_2(<8 x i16> %in) {
- %tmp2 = or <8 x i16> %in, < i16 510, i16 510, i16 510, i16 510,
- i16 510, i16 510, i16 510, i16 510 >
- ret <8 x i16> %tmp2
-}
-
-define <8 x i16> @orhi_v8i16_3(<8 x i16> %in) {
- %tmp2 = or <8 x i16> %in, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
- i16 -1, i16 -1, i16 -1 >
- ret <8 x i16> %tmp2
-}
-
-define <8 x i16> @orhi_v8i16_4(<8 x i16> %in) {
- %tmp2 = or <8 x i16> %in, < i16 -512, i16 -512, i16 -512, i16 -512,
- i16 -512, i16 -512, i16 -512, i16 -512 >
- ret <8 x i16> %tmp2
-}
-
-define zeroext i16 @orhi_u16(i16 zeroext %in) {
- %tmp37 = or i16 %in, 37 ; <i16> [#uses=1]
- ret i16 %tmp37
-}
-
-define signext i16 @orhi_i16(i16 signext %in) {
- %tmp38 = or i16 %in, 37 ; <i16> [#uses=1]
- ret i16 %tmp38
-}
-
-; ORBI instruction generation (i8 data type):
-define <16 x i8> @orbi_v16i8(<16 x i8> %in) {
- %tmp2 = or <16 x i8> %in, < i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
- i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
- i8 42, i8 42, i8 42, i8 42 >
- ret <16 x i8> %tmp2
-}
-
-define zeroext i8 @orbi_u8(i8 zeroext %in) {
- %tmp37 = or i8 %in, 37 ; <i8> [#uses=1]
- ret i8 %tmp37
-}
-
-define signext i8 @orbi_i8(i8 signext %in) {
- %tmp38 = or i8 %in, 37 ; <i8> [#uses=1]
- ret i8 %tmp38
-}
diff --git a/test/CodeGen/CellSPU/private.ll b/test/CodeGen/CellSPU/private.ll
deleted file mode 100644
index 1d933adac9..0000000000
--- a/test/CodeGen/CellSPU/private.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; Test to make sure that the 'private' is used correctly.
-;
-; RUN: llc < %s -march=cellspu > %t
-; RUN: grep .Lfoo: %t
-; RUN: grep brsl.*\.Lfoo %t
-; RUN: grep .Lbaz: %t
-; RUN: grep ila.*\.Lbaz %t
-
-define private void @foo() {
- ret void
-}
-
-@baz = private global i32 4
-
-define i32 @bar() {
- call void @foo()
- %1 = load i32* @baz, align 4
- ret i32 %1
-}
diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll
deleted file mode 100644
index 9770935276..0000000000
--- a/test/CodeGen/CellSPU/rotate_ops.ll
+++ /dev/null
@@ -1,172 +0,0 @@
-; RUN: llc < %s -march=cellspu -o %t1.s
-; RUN: grep rot %t1.s | count 86
-; RUN: grep roth %t1.s | count 8
-; RUN: grep roti.*5 %t1.s | count 1
-; RUN: grep roti.*27 %t1.s | count 1
-; RUN: grep rothi.*5 %t1.s | count 2
-; RUN: grep rothi.*11 %t1.s | count 1
-; RUN: grep rothi.*,.3 %t1.s | count 1
-; RUN: grep andhi %t1.s | count 4
-; RUN: grep shlhi %t1.s | count 4
-; RUN: cat %t1.s | FileCheck %s
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; Vector rotates are not currently supported in gcc or llvm assembly. These are
-; not tested.
-
-; 32-bit rotates:
-define i32 @rotl32_1a(i32 %arg1, i8 %arg2) {
- %tmp1 = zext i8 %arg2 to i32 ; <i32> [#uses=1]
- %B = shl i32 %arg1, %tmp1 ; <i32> [#uses=1]
- %arg22 = sub i8 32, %arg2 ; <i8> [#uses=1]
- %tmp2 = zext i8 %arg22 to i32 ; <i32> [#uses=1]
- %C = lshr i32 %arg1, %tmp2 ; <i32> [#uses=1]
- %D = or i32 %B, %C ; <i32> [#uses=1]
- ret i32 %D
-}
-
-define i32 @rotl32_1b(i32 %arg1, i16 %arg2) {
- %tmp1 = zext i16 %arg2 to i32 ; <i32> [#uses=1]
- %B = shl i32 %arg1, %tmp1 ; <i32> [#uses=1]
- %arg22 = sub i16 32, %arg2 ; <i8> [#uses=1]
- %tmp2 = zext i16 %arg22 to i32 ; <i32> [#uses=1]
- %C = lshr i32 %arg1, %tmp2 ; <i32> [#uses=1]
- %D = or i32 %B, %C ; <i32> [#uses=1]
- ret i32 %D
-}
-
-define i32 @rotl32_2(i32 %arg1, i32 %arg2) {
- %B = shl i32 %arg1, %arg2 ; <i32> [#uses=1]
- %tmp1 = sub i32 32, %arg2 ; <i32> [#uses=1]
- %C = lshr i32 %arg1, %tmp1 ; <i32> [#uses=1]
- %D = or i32 %B, %C ; <i32> [#uses=1]
- ret i32 %D
-}
-
-define i32 @rotl32_3(i32 %arg1, i32 %arg2) {
- %tmp1 = sub i32 32, %arg2 ; <i32> [#uses=1]
- %B = shl i32 %arg1, %arg2 ; <i32> [#uses=1]
- %C = lshr i32 %arg1, %tmp1 ; <i32> [#uses=1]
- %D = or i32 %B, %C ; <i32> [#uses=1]
- ret i32 %D
-}
-
-define i32 @rotl32_4(i32 %arg1, i32 %arg2) {
- %tmp1 = sub i32 32, %arg2 ; <i32> [#uses=1]
- %C = lshr i32 %arg1, %tmp1 ; <i32> [#uses=1]
- %B = shl i32 %arg1, %arg2 ; <i32> [#uses=1]
- %D = or i32 %B, %C ; <i32> [#uses=1]
- ret i32 %D
-}
-
-define i32 @rotr32_1(i32 %A, i8 %Amt) {
- %tmp1 = zext i8 %Amt to i32 ; <i32> [#uses=1]
- %B = lshr i32 %A, %tmp1 ; <i32> [#uses=1]
- %Amt2 = sub i8 32, %Amt ; <i8> [#uses=1]
- %tmp2 = zext i8 %Amt2 to i32 ; <i32> [#uses=1]
- %C = shl i32 %A, %tmp2 ; <i32> [#uses=1]
- %D = or i32 %B, %C ; <i32> [#uses=1]
- ret i32 %D
-}
-
-define i32 @rotr32_2(i32 %A, i8 %Amt) {
- %Amt2 = sub i8 32, %Amt ; <i8> [#uses=1]
- %tmp1 = zext i8 %Amt to i32 ; <i32> [#uses=1]
- %B = lshr i32 %A, %tmp1 ; <i32> [#uses=1]
- %tmp2 = zext i8 %Amt2 to i32 ; <i32> [#uses=1]
- %C = shl i32 %A, %tmp2 ; <i32> [#uses=1]
- %D = or i32 %B, %C ; <i32> [#uses=1]
- ret i32 %D
-}
-
-; Rotate left with immediate
-define i32 @rotli32(i32 %A) {
- %B = shl i32 %A, 5 ; <i32> [#uses=1]
- %C = lshr i32 %A, 27 ; <i32> [#uses=1]
- %D = or i32 %B, %C ; <i32> [#uses=1]
- ret i32 %D
-}
-
-; Rotate right with immediate
-define i32 @rotri32(i32 %A) {
- %B = lshr i32 %A, 5 ; <i32> [#uses=1]
- %C = shl i32 %A, 27 ; <i32> [#uses=1]
- %D = or i32 %B, %C ; <i32> [#uses=1]
- ret i32 %D
-}
-
-; 16-bit rotates:
-define i16 @rotr16_1(i16 %arg1, i8 %arg) {
- %tmp1 = zext i8 %arg to i16 ; <i16> [#uses=1]
- %B = lshr i16 %arg1, %tmp1 ; <i16> [#uses=1]
- %arg2 = sub i8 16, %arg ; <i8> [#uses=1]
- %tmp2 = zext i8 %arg2 to i16 ; <i16> [#uses=1]
- %C = shl i16 %arg1, %tmp2 ; <i16> [#uses=1]
- %D = or i16 %B, %C ; <i16> [#uses=1]
- ret i16 %D
-}
-
-define i16 @rotr16_2(i16 %arg1, i16 %arg) {
- %B = lshr i16 %arg1, %arg ; <i16> [#uses=1]
- %tmp1 = sub i16 16, %arg ; <i16> [#uses=1]
- %C = shl i16 %arg1, %tmp1 ; <i16> [#uses=1]
- %D = or i16 %B, %C ; <i16> [#uses=1]
- ret i16 %D
-}
-
-define i16 @rotli16(i16 %A) {
- %B = shl i16 %A, 5 ; <i16> [#uses=1]
- %C = lshr i16 %A, 11 ; <i16> [#uses=1]
- %D = or i16 %B, %C ; <i16> [#uses=1]
- ret i16 %D
-}
-
-define i16 @rotri16(i16 %A) {
- %B = lshr i16 %A, 5 ; <i16> [#uses=1]
- %C = shl i16 %A, 11 ; <i16> [#uses=1]
- %D = or i16 %B, %C ; <i16> [#uses=1]
- ret i16 %D
-}
-
-define i8 @rotl8(i8 %A, i8 %Amt) {
- %B = shl i8 %A, %Amt ; <i8> [#uses=1]
- %Amt2 = sub i8 8, %Amt ; <i8> [#uses=1]
- %C = lshr i8 %A, %Amt2 ; <i8> [#uses=1]
- %D = or i8 %B, %C ; <i8> [#uses=1]
- ret i8 %D
-}
-
-define i8 @rotr8(i8 %A, i8 %Amt) {
- %B = lshr i8 %A, %Amt ; <i8> [#uses=1]
- %Amt2 = sub i8 8, %Amt ; <i8> [#uses=1]
- %C = shl i8 %A, %Amt2 ; <i8> [#uses=1]
- %D = or i8 %B, %C ; <i8> [#uses=1]
- ret i8 %D
-}
-
-define i8 @rotli8(i8 %A) {
- %B = shl i8 %A, 5 ; <i8> [#uses=1]
- %C = lshr i8 %A, 3 ; <i8> [#uses=1]
- %D = or i8 %B, %C ; <i8> [#uses=1]
- ret i8 %D
-}
-
-define i8 @rotri8(i8 %A) {
- %B = lshr i8 %A, 5 ; <i8> [#uses=1]
- %C = shl i8 %A, 3 ; <i8> [#uses=1]
- %D = or i8 %B, %C ; <i8> [#uses=1]
- ret i8 %D
-}
-
-define <2 x float> @test1(<4 x float> %param )
-{
-; CHECK: test1
-; CHECK: shufb
- %el = extractelement <4 x float> %param, i32 1
- %vec1 = insertelement <1 x float> undef, float %el, i32 0
- %rv = shufflevector <1 x float> %vec1, <1 x float> undef, <2 x i32><i32 0,i32 0>
-; CHECK: bi $lr
- ret <2 x float> %rv
-}
diff --git a/test/CodeGen/CellSPU/select_bits.ll b/test/CodeGen/CellSPU/select_bits.ll
deleted file mode 100644
index 65e0aa6fa0..0000000000
--- a/test/CodeGen/CellSPU/select_bits.ll
+++ /dev/null
@@ -1,572 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep selb %t1.s | count 56
-
-; CellSPU legalization is over-sensitive to Legalize's traversal order.
-; XFAIL: *
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; v2i64
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define <2 x i64> @selectbits_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
- %C = and <2 x i64> %rC, %rB
- %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
- %B = and <2 x i64> %A, %rA
- %D = or <2 x i64> %C, %B
- ret <2 x i64> %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define <2 x i64> @selectbits_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
- %C = and <2 x i64> %rB, %rC
- %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
- %B = and <2 x i64> %A, %rA
- %D = or <2 x i64> %C, %B
- ret <2 x i64> %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define <2 x i64> @selectbits_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
- %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
- %B = and <2 x i64> %A, %rA
- %C = and <2 x i64> %rB, %rC
- %D = or <2 x i64> %C, %B
- ret <2 x i64> %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define <2 x i64> @selectbits_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
- %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
- %B = and <2 x i64> %A, %rA
- %C = and <2 x i64> %rC, %rB
- %D = or <2 x i64> %C, %B
- ret <2 x i64> %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define <2 x i64> @selectbits_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
- %C = and <2 x i64> %rC, %rB
- %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
- %B = and <2 x i64> %rA, %A
- %D = or <2 x i64> %C, %B
- ret <2 x i64> %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define <2 x i64> @selectbits_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
- %C = and <2 x i64> %rB, %rC
- %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
- %B = and <2 x i64> %rA, %A
- %D = or <2 x i64> %C, %B
- ret <2 x i64> %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define <2 x i64> @selectbits_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
- %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
- %B = and <2 x i64> %rA, %A
- %C = and <2 x i64> %rB, %rC
- %D = or <2 x i64> %C, %B
- ret <2 x i64> %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define <2 x i64> @selectbits_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
- %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
- %B = and <2 x i64> %rA, %A
- %C = and <2 x i64> %rC, %rB
- %D = or <2 x i64> %C, %B
- ret <2 x i64> %D
-}
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; v4i32
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define <4 x i32> @selectbits_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
- %C = and <4 x i32> %rC, %rB
- %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
- %B = and <4 x i32> %A, %rA
- %D = or <4 x i32> %C, %B
- ret <4 x i32> %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define <4 x i32> @selectbits_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
- %C = and <4 x i32> %rB, %rC
- %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
- %B = and <4 x i32> %A, %rA
- %D = or <4 x i32> %C, %B
- ret <4 x i32> %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define <4 x i32> @selectbits_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
- %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
- %B = and <4 x i32> %A, %rA
- %C = and <4 x i32> %rB, %rC
- %D = or <4 x i32> %C, %B
- ret <4 x i32> %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define <4 x i32> @selectbits_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
- %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
- %B = and <4 x i32> %A, %rA
- %C = and <4 x i32> %rC, %rB
- %D = or <4 x i32> %C, %B
- ret <4 x i32> %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define <4 x i32> @selectbits_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
- %C = and <4 x i32> %rC, %rB
- %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
- %B = and <4 x i32> %rA, %A
- %D = or <4 x i32> %C, %B
- ret <4 x i32> %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define <4 x i32> @selectbits_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
- %C = and <4 x i32> %rB, %rC
- %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
- %B = and <4 x i32> %rA, %A
- %D = or <4 x i32> %C, %B
- ret <4 x i32> %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define <4 x i32> @selectbits_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
- %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
- %B = and <4 x i32> %rA, %A
- %C = and <4 x i32> %rB, %rC
- %D = or <4 x i32> %C, %B
- ret <4 x i32> %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define <4 x i32> @selectbits_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
- %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
- %B = and <4 x i32> %rA, %A
- %C = and <4 x i32> %rC, %rB
- %D = or <4 x i32> %C, %B
- ret <4 x i32> %D
-}
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; v8i16
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define <8 x i16> @selectbits_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
- %C = and <8 x i16> %rC, %rB
- %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
- i16 -1, i16 -1, i16 -1, i16 -1 >
- %B = and <8 x i16> %A, %rA
- %D = or <8 x i16> %C, %B
- ret <8 x i16> %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define <8 x i16> @selectbits_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
- %C = and <8 x i16> %rB, %rC
- %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
- i16 -1, i16 -1, i16 -1, i16 -1 >
- %B = and <8 x i16> %A, %rA
- %D = or <8 x i16> %C, %B
- ret <8 x i16> %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define <8 x i16> @selectbits_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
- %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
- i16 -1, i16 -1, i16 -1, i16 -1 >
- %B = and <8 x i16> %A, %rA
- %C = and <8 x i16> %rB, %rC
- %D = or <8 x i16> %C, %B
- ret <8 x i16> %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define <8 x i16> @selectbits_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
- %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
- i16 -1, i16 -1, i16 -1, i16 -1 >
- %B = and <8 x i16> %A, %rA
- %C = and <8 x i16> %rC, %rB
- %D = or <8 x i16> %C, %B
- ret <8 x i16> %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define <8 x i16> @selectbits_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
- %C = and <8 x i16> %rC, %rB
- %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
- i16 -1, i16 -1, i16 -1, i16 -1 >
- %B = and <8 x i16> %rA, %A
- %D = or <8 x i16> %C, %B
- ret <8 x i16> %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define <8 x i16> @selectbits_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
- %C = and <8 x i16> %rB, %rC
- %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
- i16 -1, i16 -1, i16 -1, i16 -1 >
- %B = and <8 x i16> %rA, %A
- %D = or <8 x i16> %C, %B
- ret <8 x i16> %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define <8 x i16> @selectbits_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
- %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
- i16 -1, i16 -1, i16 -1, i16 -1 >
- %B = and <8 x i16> %rA, %A
- %C = and <8 x i16> %rB, %rC
- %D = or <8 x i16> %C, %B
- ret <8 x i16> %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define <8 x i16> @selectbits_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
- %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
- i16 -1, i16 -1, i16 -1, i16 -1 >
- %B = and <8 x i16> %rA, %A
- %C = and <8 x i16> %rC, %rB
- %D = or <8 x i16> %C, %B
- ret <8 x i16> %D
-}
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; v16i8
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define <16 x i8> @selectbits_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
- %C = and <16 x i8> %rC, %rB
- %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1 >
- %B = and <16 x i8> %A, %rA
- %D = or <16 x i8> %C, %B
- ret <16 x i8> %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define <16 x i8> @selectbits_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
- %C = and <16 x i8> %rB, %rC
- %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1 >
- %B = and <16 x i8> %A, %rA
- %D = or <16 x i8> %C, %B
- ret <16 x i8> %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define <16 x i8> @selectbits_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
- %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1 >
- %B = and <16 x i8> %A, %rA
- %C = and <16 x i8> %rB, %rC
- %D = or <16 x i8> %C, %B
- ret <16 x i8> %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define <16 x i8> @selectbits_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
- %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1 >
- %B = and <16 x i8> %A, %rA
- %C = and <16 x i8> %rC, %rB
- %D = or <16 x i8> %C, %B
- ret <16 x i8> %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define <16 x i8> @selectbits_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
- %C = and <16 x i8> %rC, %rB
- %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1 >
- %B = and <16 x i8> %rA, %A
- %D = or <16 x i8> %C, %B
- ret <16 x i8> %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define <16 x i8> @selectbits_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
- %C = and <16 x i8> %rB, %rC
- %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1 >
- %B = and <16 x i8> %rA, %A
- %D = or <16 x i8> %C, %B
- ret <16 x i8> %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define <16 x i8> @selectbits_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
- %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1 >
- %B = and <16 x i8> %rA, %A
- %C = and <16 x i8> %rB, %rC
- %D = or <16 x i8> %C, %B
- ret <16 x i8> %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define <16 x i8> @selectbits_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
- %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1 >
- %B = and <16 x i8> %rA, %A
- %C = and <16 x i8> %rC, %rB
- %D = or <16 x i8> %C, %B
- ret <16 x i8> %D
-}
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; i32
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define i32 @selectbits_i32_01(i32 %rA, i32 %rB, i32 %rC) {
- %C = and i32 %rC, %rB
- %A = xor i32 %rC, -1
- %B = and i32 %A, %rA
- %D = or i32 %C, %B
- ret i32 %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define i32 @selectbits_i32_02(i32 %rA, i32 %rB, i32 %rC) {
- %C = and i32 %rB, %rC
- %A = xor i32 %rC, -1
- %B = and i32 %A, %rA
- %D = or i32 %C, %B
- ret i32 %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define i32 @selectbits_i32_03(i32 %rA, i32 %rB, i32 %rC) {
- %A = xor i32 %rC, -1
- %B = and i32 %A, %rA
- %C = and i32 %rB, %rC
- %D = or i32 %C, %B
- ret i32 %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define i32 @selectbits_i32_04(i32 %rA, i32 %rB, i32 %rC) {
- %A = xor i32 %rC, -1
- %B = and i32 %A, %rA
- %C = and i32 %rC, %rB
- %D = or i32 %C, %B
- ret i32 %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define i32 @selectbits_i32_05(i32 %rA, i32 %rB, i32 %rC) {
- %C = and i32 %rC, %rB
- %A = xor i32 %rC, -1
- %B = and i32 %rA, %A
- %D = or i32 %C, %B
- ret i32 %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define i32 @selectbits_i32_06(i32 %rA, i32 %rB, i32 %rC) {
- %C = and i32 %rB, %rC
- %A = xor i32 %rC, -1
- %B = and i32 %rA, %A
- %D = or i32 %C, %B
- ret i32 %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define i32 @selectbits_i32_07(i32 %rA, i32 %rB, i32 %rC) {
- %A = xor i32 %rC, -1
- %B = and i32 %rA, %A
- %C = and i32 %rB, %rC
- %D = or i32 %C, %B
- ret i32 %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define i32 @selectbits_i32_08(i32 %rA, i32 %rB, i32 %rC) {
- %A = xor i32 %rC, -1
- %B = and i32 %rA, %A
- %C = and i32 %rC, %rB
- %D = or i32 %C, %B
- ret i32 %D
-}
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; i16
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define i16 @selectbits_i16_01(i16 %rA, i16 %rB, i16 %rC) {
- %C = and i16 %rC, %rB
- %A = xor i16 %rC, -1
- %B = and i16 %A, %rA
- %D = or i16 %C, %B
- ret i16 %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define i16 @selectbits_i16_02(i16 %rA, i16 %rB, i16 %rC) {
- %C = and i16 %rB, %rC
- %A = xor i16 %rC, -1
- %B = and i16 %A, %rA
- %D = or i16 %C, %B
- ret i16 %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define i16 @selectbits_i16_03(i16 %rA, i16 %rB, i16 %rC) {
- %A = xor i16 %rC, -1
- %B = and i16 %A, %rA
- %C = and i16 %rB, %rC
- %D = or i16 %C, %B
- ret i16 %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define i16 @selectbits_i16_04(i16 %rA, i16 %rB, i16 %rC) {
- %A = xor i16 %rC, -1
- %B = and i16 %A, %rA
- %C = and i16 %rC, %rB
- %D = or i16 %C, %B
- ret i16 %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define i16 @selectbits_i16_05(i16 %rA, i16 %rB, i16 %rC) {
- %C = and i16 %rC, %rB
- %A = xor i16 %rC, -1
- %B = and i16 %rA, %A
- %D = or i16 %C, %B
- ret i16 %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define i16 @selectbits_i16_06(i16 %rA, i16 %rB, i16 %rC) {
- %C = and i16 %rB, %rC
- %A = xor i16 %rC, -1
- %B = and i16 %rA, %A
- %D = or i16 %C, %B
- ret i16 %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define i16 @selectbits_i16_07(i16 %rA, i16 %rB, i16 %rC) {
- %A = xor i16 %rC, -1
- %B = and i16 %rA, %A
- %C = and i16 %rB, %rC
- %D = or i16 %C, %B
- ret i16 %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define i16 @selectbits_i16_08(i16 %rA, i16 %rB, i16 %rC) {
- %A = xor i16 %rC, -1
- %B = and i16 %rA, %A
- %C = and i16 %rC, %rB
- %D = or i16 %C, %B
- ret i16 %D
-}
-
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-; i8
-;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-
-; (or (and rC, rB), (and (not rC), rA))
-define i8 @selectbits_i8_01(i8 %rA, i8 %rB, i8 %rC) {
- %C = and i8 %rC, %rB
- %A = xor i8 %rC, -1
- %B = and i8 %A, %rA
- %D = or i8 %C, %B
- ret i8 %D
-}
-
-; (or (and rB, rC), (and (not rC), rA))
-define i8 @selectbits_i8_02(i8 %rA, i8 %rB, i8 %rC) {
- %C = and i8 %rB, %rC
- %A = xor i8 %rC, -1
- %B = and i8 %A, %rA
- %D = or i8 %C, %B
- ret i8 %D
-}
-
-; (or (and (not rC), rA), (and rB, rC))
-define i8 @selectbits_i8_03(i8 %rA, i8 %rB, i8 %rC) {
- %A = xor i8 %rC, -1
- %B = and i8 %A, %rA
- %C = and i8 %rB, %rC
- %D = or i8 %C, %B
- ret i8 %D
-}
-
-; (or (and (not rC), rA), (and rC, rB))
-define i8 @selectbits_i8_04(i8 %rA, i8 %rB, i8 %rC) {
- %A = xor i8 %rC, -1
- %B = and i8 %A, %rA
- %C = and i8 %rC, %rB
- %D = or i8 %C, %B
- ret i8 %D
-}
-
-; (or (and rC, rB), (and rA, (not rC)))
-define i8 @selectbits_i8_05(i8 %rA, i8 %rB, i8 %rC) {
- %C = and i8 %rC, %rB
- %A = xor i8 %rC, -1
- %B = and i8 %rA, %A
- %D = or i8 %C, %B
- ret i8 %D
-}
-
-; (or (and rB, rC), (and rA, (not rC)))
-define i8 @selectbits_i8_06(i8 %rA, i8 %rB, i8 %rC) {
- %C = and i8 %rB, %rC
- %A = xor i8 %rC, -1
- %B = and i8 %rA, %A
- %D = or i8 %C, %B
- ret i8 %D
-}
-
-; (or (and rA, (not rC)), (and rB, rC))
-define i8 @selectbits_i8_07(i8 %rA, i8 %rB, i8 %rC) {
- %A = xor i8 %rC, -1
- %B = and i8 %rA, %A
- %C = and i8 %rB, %rC
- %D = or i8 %C, %B
- ret i8 %D
-}
-
-; (or (and rA, (not rC)), (and rC, rB))
-define i8 @selectbits_i8_08(i8 %rA, i8 %rB, i8 %rC) {
- %A = xor i8 %rC, -1
- %B = and i8 %rA, %A
- %C = and i8 %rC, %rB
- %D = or i8 %C, %B
- ret i8 %D
-}
diff --git a/test/CodeGen/CellSPU/sext128.ll b/test/CodeGen/CellSPU/sext128.ll
deleted file mode 100644
index 6ae9aa5120..0000000000
--- a/test/CodeGen/CellSPU/sext128.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-; ModuleID = 'sext128.bc'
-target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:128:128-v128:128:128-a0:0:128-s0:128:128"
-target triple = "spu"
-
-define i128 @sext_i64_i128(i64 %a) {
-entry:
- %0 = sext i64 %a to i128
- ret i128 %0
-; CHECK: long 269488144
-; CHECK: long 269488144
-; CHECK: long 66051
-; CHECK: long 67438087
-; CHECK-NOT: rotqmbyi
-; CHECK: lqa
-; CHECK: rotmai
-; CHECK: shufb
-}
-
-define i128 @sext_i32_i128(i32 %a) {
-entry:
- %0 = sext i32 %a to i128
- ret i128 %0
-; CHECK: long 269488144
-; CHECK: long 269488144
-; CHECK: long 269488144
-; CHECK: long 66051
-; CHECK-NOT: rotqmbyi
-; CHECK: lqa
-; CHECK: rotmai
-; CHECK: shufb
-}
-
-define i128 @sext_i32_i128a(float %a) {
-entry:
- %0 = call i32 @myfunc(float %a)
- %1 = sext i32 %0 to i128
- ret i128 %1
-; CHECK: long 269488144
-; CHECK: long 269488144
-; CHECK: long 269488144
-; CHECK: long 66051
-; CHECK-NOT: rotqmbyi
-; CHECK: lqa
-; CHECK: rotmai
-; CHECK: shufb
-}
-
-declare i32 @myfunc(float)
-
-define i128 @func1(i8 %u) {
-entry:
-; CHECK: xsbh
-; CHECK: xshw
-; CHECK: rotmai
-; CHECK: shufb
-; CHECK: bi $lr
- %0 = sext i8 %u to i128
- ret i128 %0
-}
-
-define i128 @func2(i16 %u) {
-entry:
-; CHECK: xshw
-; CHECK: rotmai
-; CHECK: shufb
-; CHECK: bi $lr
- %0 = sext i16 %u to i128
- ret i128 %0
-}
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll
deleted file mode 100644
index 1ccc356dcf..0000000000
--- a/test/CodeGen/CellSPU/shift_ops.ll
+++ /dev/null
@@ -1,348 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep "shlh " %t1.s | count 10
-; RUN: grep "shlhi " %t1.s | count 3
-; RUN: grep "shl " %t1.s | count 10
-; RUN: grep "shli " %t1.s | count 3
-; RUN: grep "xshw " %t1.s | count 5
-; RUN: grep "and " %t1.s | count 15
-; RUN: grep "andi " %t1.s | count 4
-; RUN: grep "rotmi " %t1.s | count 4
-; RUN: grep "rotqmbyi " %t1.s | count 1
-; RUN: grep "rotqmbii " %t1.s | count 2
-; RUN: grep "rotqmby " %t1.s | count 1
-; RUN: grep "rotqmbi " %t1.s | count 2
-; RUN: grep "rotqbyi " %t1.s | count 1
-; RUN: grep "rotqbii " %t1.s | count 2
-; RUN: grep "rotqbybi " %t1.s | count 1
-; RUN: grep "sfi " %t1.s | count 6
-; RUN: cat %t1.s | FileCheck %s
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; Shift left i16 via register, note that the second operand to shl is promoted
-; to a 32-bit type:
-
-define i16 @shlh_i16_1(i16 %arg1, i16 %arg2) {
- %A = shl i16 %arg1, %arg2
- ret i16 %A
-}
-
-define i16 @shlh_i16_2(i16 %arg1, i16 %arg2) {
- %A = shl i16 %arg2, %arg1
- ret i16 %A
-}
-
-define signext i16 @shlh_i16_3(i16 signext %arg1, i16 signext %arg2) {
- %A = shl i16 %arg1, %arg2
- ret i16 %A
-}
-
-define signext i16 @shlh_i16_4(i16 signext %arg1, i16 signext %arg2) {
- %A = shl i16 %arg2, %arg1
- ret i16 %A
-}
-
-define zeroext i16 @shlh_i16_5(i16 zeroext %arg1, i16 zeroext %arg2) {
- %A = shl i16 %arg1, %arg2
- ret i16 %A
-}
-
-define zeroext i16 @shlh_i16_6(i16 zeroext %arg1, i16 zeroext %arg2) {
- %A = shl i16 %arg2, %arg1
- ret i16 %A
-}
-
-; Shift left i16 with immediate:
-define i16 @shlhi_i16_1(i16 %arg1) {
- %A = shl i16 %arg1, 12
- ret i16 %A
-}
-
-; Should not generate anything other than the return, arg1 << 0 = arg1
-define i16 @shlhi_i16_2(i16 %arg1) {
- %A = shl i16 %arg1, 0
- ret i16 %A
-}
-
-define i16 @shlhi_i16_3(i16 %arg1) {
- %A = shl i16 16383, %arg1
- ret i16 %A
-}
-
-; Should generate 0, 0 << arg1 = 0
-define i16 @shlhi_i16_4(i16 %arg1) {
- %A = shl i16 0, %arg1
- ret i16 %A
-}
-
-define signext i16 @shlhi_i16_5(i16 signext %arg1) {
- %A = shl i16 %arg1, 12
- ret i16 %A
-}
-
-; Should not generate anything other than the return, arg1 << 0 = arg1
-define signext i16 @shlhi_i16_6(i16 signext %arg1) {
- %A = shl i16 %arg1, 0
- ret i16 %A
-}
-
-define signext i16 @shlhi_i16_7(i16 signext %arg1) {
- %A = shl i16 16383, %arg1
- ret i16 %A
-}
-
-; Should generate 0, 0 << arg1 = 0
-define signext i16 @shlhi_i16_8(i16 signext %arg1) {
- %A = shl i16 0, %arg1
- ret i16 %A
-}
-
-define zeroext i16 @shlhi_i16_9(i16 zeroext %arg1) {
- %A = shl i16 %arg1, 12
- ret i16 %A
-}
-
-; Should not generate anything other than the return, arg1 << 0 = arg1
-define zeroext i16 @shlhi_i16_10(i16 zeroext %arg1) {
- %A = shl i16 %arg1, 0
- ret i16 %A
-}
-
-define zeroext i16 @shlhi_i16_11(i16 zeroext %arg1) {
- %A = shl i16 16383, %arg1
- ret i16 %A
-}
-
-; Should generate 0, 0 << arg1 = 0
-define zeroext i16 @shlhi_i16_12(i16 zeroext %arg1) {
- %A = shl i16 0, %arg1
- ret i16 %A
-}
-
-; Shift left i32 via register, note that the second operand to shl is promoted
-; to a 32-bit type:
-
-define i32 @shl_i32_1(i32 %arg1, i32 %arg2) {
- %A = shl i32 %arg1, %arg2
- ret i32 %A
-}
-
-define i32 @shl_i32_2(i32 %arg1, i32 %arg2) {
- %A = shl i32 %arg2, %arg1
- ret i32 %A
-}
-
-define signext i32 @shl_i32_3(i32 signext %arg1, i32 signext %arg2) {
- %A = shl i32 %arg1, %arg2
- ret i32 %A
-}
-
-define signext i32 @shl_i32_4(i32 signext %arg1, i32 signext %arg2) {
- %A = shl i32 %arg2, %arg1
- ret i32 %A
-}
-
-define zeroext i32 @shl_i32_5(i32 zeroext %arg1, i32 zeroext %arg2) {
- %A = shl i32 %arg1, %arg2
- ret i32 %A
-}
-
-define zeroext i32 @shl_i32_6(i32 zeroext %arg1, i32 zeroext %arg2) {
- %A = shl i32 %arg2, %arg1
- ret i32 %A
-}
-
-; Shift left i32 with immediate:
-define i32 @shli_i32_1(i32 %arg1) {
- %A = shl i32 %arg1, 12
- ret i32 %A
-}
-
-; Should not generate anything other than the return, arg1 << 0 = arg1
-define i32 @shli_i32_2(i32 %arg1) {
- %A = shl i32 %arg1, 0
- ret i32 %A
-}
-
-define i32 @shli_i32_3(i32 %arg1) {
- %A = shl i32 16383, %arg1
- ret i32 %A
-}
-
-; Should generate 0, 0 << arg1 = 0
-define i32 @shli_i32_4(i32 %arg1) {
- %A = shl i32 0, %arg1
- ret i32 %A
-}
-
-define signext i32 @shli_i32_5(i32 signext %arg1) {
- %A = shl i32 %arg1, 12
- ret i32 %A
-}
-
-; Should not generate anything other than the return, arg1 << 0 = arg1
-define signext i32 @shli_i32_6(i32 signext %arg1) {
- %A = shl i32 %arg1, 0
- ret i32 %A
-}
-
-define signext i32 @shli_i32_7(i32 signext %arg1) {
- %A = shl i32 16383, %arg1
- ret i32 %A
-}
-
-; Should generate 0, 0 << arg1 = 0
-define signext i32 @shli_i32_8(i32 signext %arg1) {
- %A = shl i32 0, %arg1
- ret i32 %A
-}
-
-define zeroext i32 @shli_i32_9(i32 zeroext %arg1) {
- %A = shl i32 %arg1, 12
- ret i32 %A
-}
-
-; Should not generate anything other than the return, arg1 << 0 = arg1
-define zeroext i32 @shli_i32_10(i32 zeroext %arg1) {
- %A = shl i32 %arg1, 0
- ret i32 %A
-}
-
-define zeroext i32 @shli_i32_11(i32 zeroext %arg1) {
- %A = shl i32 16383, %arg1
- ret i32 %A
-}
-
-; Should generate 0, 0 << arg1 = 0
-define zeroext i32 @shli_i32_12(i32 zeroext %arg1) {
- %A = shl i32 0, %arg1
- ret i32 %A
-}
-
-;; i64 shift left
-
-define i64 @shl_i64_1(i64 %arg1) {
- %A = shl i64 %arg1, 9
- ret i64 %A
-}
-
-define i64 @shl_i64_2(i64 %arg1) {
- %A = shl i64 %arg1, 3
- ret i64 %A
-}
-
-define i64 @shl_i64_3(i64 %arg1, i32 %shift) {
- %1 = zext i32 %shift to i64
- %2 = shl i64 %arg1, %1
- ret i64 %2
-}
-
-;; i64 shift right logical (shift 0s from the right)
-
-define i64 @lshr_i64_1(i64 %arg1) {
- %1 = lshr i64 %arg1, 9
- ret i64 %1
-}
-
-define i64 @lshr_i64_2(i64 %arg1) {
- %1 = lshr i64 %arg1, 3
- ret i64 %1
-}
-
-define i64 @lshr_i64_3(i64 %arg1, i32 %shift) {
- %1 = zext i32 %shift to i64
- %2 = lshr i64 %arg1, %1
- ret i64 %2
-}
-
-;; i64 shift right arithmetic (shift 1s from the right)
-
-define i64 @ashr_i64_1(i64 %arg) {
- %1 = ashr i64 %arg, 9
- ret i64 %1
-}
-
-define i64 @ashr_i64_2(i64 %arg) {
- %1 = ashr i64 %arg, 3
- ret i64 %1
-}
-
-define i64 @ashr_i64_3(i64 %arg1, i32 %shift) {
- %1 = zext i32 %shift to i64
- %2 = ashr i64 %arg1, %1
- ret i64 %2
-}
-
-define i32 @hi32_i64(i64 %arg) {
- %1 = lshr i64 %arg, 32
- %2 = trunc i64 %1 to i32
- ret i32 %2
-}
-
-; some random tests
-define i128 @test_lshr_i128( i128 %val ) {
- ;CHECK: test_lshr_i128
- ;CHECK: sfi
- ;CHECK: rotqmbi
- ;CHECK: rotqmbybi
- ;CHECK: bi $lr
- %rv = lshr i128 %val, 64
- ret i128 %rv
-}
-
-;Vector shifts
-define <2 x i32> @shl_v2i32(<2 x i32> %val, <2 x i32> %sh) {
-;CHECK: shl
-;CHECK: bi $lr
- %rv = shl <2 x i32> %val, %sh
- ret <2 x i32> %rv
-}
-
-define <4 x i32> @shl_v4i32(<4 x i32> %val, <4 x i32> %sh) {
-;CHECK: shl
-;CHECK: bi $lr
- %rv = shl <4 x i32> %val, %sh
- ret <4 x i32> %rv
-}
-
-define <8 x i16> @shl_v8i16(<8 x i16> %val, <8 x i16> %sh) {
-;CHECK: shlh
-;CHECK: bi $lr
- %rv = shl <8 x i16> %val, %sh
- ret <8 x i16> %rv
-}
-
-define <4 x i32> @lshr_v4i32(<4 x i32> %val, <4 x i32> %sh) {
-;CHECK: rotm
-;CHECK: bi $lr
- %rv = lshr <4 x i32> %val, %sh
- ret <4 x i32> %rv
-}
-
-define <8 x i16> @lshr_v8i16(<8 x i16> %val, <8 x i16> %sh) {
-;CHECK: sfhi
-;CHECK: rothm
-;CHECK: bi $lr
- %rv = lshr <8 x i16> %val, %sh
- ret <8 x i16> %rv
-}
-
-define <4 x i32> @ashr_v4i32(<4 x i32> %val, <4 x i32> %sh) {
-;CHECK: rotma
-;CHECK: bi $lr
- %rv = ashr <4 x i32> %val, %sh
- ret <4 x i32> %rv
-}
-
-define <8 x i16> @ashr_v8i16(<8 x i16> %val, <8 x i16> %sh) {
-;CHECK: sfhi
-;CHECK: rotmah
-;CHECK: bi $lr
- %rv = ashr <8 x i16> %val, %sh
- ret <8 x i16> %rv
-}
-
-define <2 x i64> @special_const() {
- ret <2 x i64> <i64 4294967295, i64 4294967295>
-}
diff --git a/test/CodeGen/CellSPU/shuffles.ll b/test/CodeGen/CellSPU/shuffles.ll
deleted file mode 100644
index 973586bf6c..0000000000
--- a/test/CodeGen/CellSPU/shuffles.ll
+++ /dev/null
@@ -1,69 +0,0 @@
-; RUN: llc -O1 --march=cellspu < %s | FileCheck %s
-
-;CHECK: shuffle
-define <4 x float> @shuffle(<4 x float> %param1, <4 x float> %param2) {
- ; CHECK: cwd {{\$.}}, 0($sp)
- ; CHECK: shufb {{\$., \$4, \$3, \$.}}
- %val= shufflevector <4 x float> %param1, <4 x float> %param2, <4 x i32> <i32 4,i32 1,i32 2,i32 3>
- ret <4 x float> %val
-}
-
-;CHECK: splat
-define <4 x float> @splat(float %param1) {
- ; CHECK: lqa
- ; CHECK: shufb $3
- ; CHECK: bi
- %vec = insertelement <1 x float> undef, float %param1, i32 0
- %val= shufflevector <1 x float> %vec, <1 x float> undef, <4 x i32> <i32 0,i32 0,i32 0,i32 0>
- ret <4 x float> %val
-}
-
-;CHECK: test_insert
-define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) {
- %sl2_17_tmp1 = insertelement <2 x float> zeroinitializer, float %val1, i32 0
-;CHECK: lqa $6,
-;CHECK: shufb $4, $4, $5, $6
- %sl2_17 = insertelement <2 x float> %sl2_17_tmp1, float %val2, i32 1
-
-;CHECK: cdd $5, 0($3)
-;CHECK: lqd $6, 0($3)
-;CHECK: shufb $4, $4, $6, $5
-;CHECK: stqd $4, 0($3)
-;CHECK: bi $lr
- store <2 x float> %sl2_17, <2 x float>* %ptr
- ret void
-}
-
-;CHECK: test_insert_1
-define <4 x float> @test_insert_1(<4 x float> %vparam, float %eltparam) {
-;CHECK: cwd $5, 4($sp)
-;CHECK: shufb $3, $4, $3, $5
-;CHECK: bi $lr
- %rv = insertelement <4 x float> %vparam, float %eltparam, i32 1
- ret <4 x float> %rv
-}
-
-;CHECK: test_v2i32
-define <2 x i32> @test_v2i32(<4 x i32>%vec)
-{
-;CHECK: rotqbyi $3, $3, 4
-;CHECK: bi $lr
- %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, <2 x i32><i32 1,i32 2>
- ret <2 x i32> %rv
-}
-
-define <4 x i32> @test_v4i32_rot8(<4 x i32>%vec)
-{
- %rv = shufflevector <4 x i32> %vec, <4 x i32> undef,
- <4 x i32> <i32 2,i32 3,i32 0, i32 1>
- ret <4 x i32> %rv
-}
-
-;CHECK: test_v4i32_rot4
-define <4 x i32> @test_v4i32_rot4(<4 x i32>%vec)
-{
- %rv = shufflevector <4 x i32> %vec, <4 x i32> undef,
- <4 x i32> <i32 1,i32 2,i32 3, i32 0>
- ret <4 x i32> %rv
-}
-
diff --git a/test/CodeGen/CellSPU/sp_farith.ll b/test/CodeGen/CellSPU/sp_farith.ll
deleted file mode 100644
index 80bf47ccf5..0000000000
--- a/test/CodeGen/CellSPU/sp_farith.ll
+++ /dev/null
@@ -1,90 +0,0 @@
-; RUN: llc < %s -march=cellspu -enable-unsafe-fp-math > %t1.s
-; RUN: grep fa %t1.s | count 2
-; RUN: grep fs %t1.s | count 2
-; RUN: grep fm %t1.s | count 6
-; RUN: grep fma %t1.s | count 2
-; RUN: grep fms %t1.s | count 2
-; RUN: grep fnms %t1.s | count 3
-;
-; This file includes standard floating point arithmetic instructions
-; NOTE fdiv is tested separately since it is a compound operation
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define float @fp_add(float %arg1, float %arg2) {
- %A = fadd float %arg1, %arg2 ; <float> [#uses=1]
- ret float %A
-}
-
-define <4 x float> @fp_add_vec(<4 x float> %arg1, <4 x float> %arg2) {
- %A = fadd <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1]
- ret <4 x float> %A
-}
-
-define float @fp_sub(float %arg1, float %arg2) {
- %A = fsub float %arg1, %arg2 ; <float> [#uses=1]
- ret float %A
-}
-
-define <4 x float> @fp_sub_vec(<4 x float> %arg1, <4 x float> %arg2) {
- %A = fsub <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1]
- ret <4 x float> %A
-}
-
-define float @fp_mul(float %arg1, float %arg2) {
- %A = fmul float %arg1, %arg2 ; <float> [#uses=1]
- ret float %A
-}
-
-define <4 x float> @fp_mul_vec(<4 x float> %arg1, <4 x float> %arg2) {
- %A = fmul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1]
- ret <4 x float> %A
-}
-
-define float @fp_mul_add(float %arg1, float %arg2, float %arg3) {
- %A = fmul float %arg1, %arg2 ; <float> [#uses=1]
- %B = fadd float %A, %arg3 ; <float> [#uses=1]
- ret float %B
-}
-
-define <4 x float> @fp_mul_add_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
- %A = fmul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1]
- %B = fadd <4 x float> %A, %arg3 ; <<4 x float>> [#uses=1]
- ret <4 x float> %B
-}
-
-define float @fp_mul_sub(float %arg1, float %arg2, float %arg3) {
- %A = fmul float %arg1, %arg2 ; <float> [#uses=1]
- %B = fsub float %A, %arg3 ; <float> [#uses=1]
- ret float %B
-}
-
-define <4 x float> @fp_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
- %A = fmul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1]
- %B = fsub <4 x float> %A, %arg3 ; <<4 x float>> [#uses=1]
- ret <4 x float> %B
-}
-
-; Test the straightforward way of getting fnms
-; c - a * b
-define float @fp_neg_mul_sub_1(float %arg1, float %arg2, float %arg3) {
- %A = fmul float %arg1, %arg2
- %B = fsub float %arg3, %A
- ret float %B
-}
-
-; Test another way of getting fnms
-; - ( a *b -c ) = c - a * b
-define float @fp_neg_mul_sub_2(float %arg1, float %arg2, float %arg3) {
- %A = fmul float %arg1, %arg2
- %B = fsub float %A, %arg3
- %C = fsub float -0.0, %B
- ret float %C
-}
-
-define <4 x float> @fp_neg_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
- %A = fmul <4 x float> %arg1, %arg2
- %B = fsub <4 x float> %A, %arg3
- %D = fsub <4 x float> < float -0.0, float -0.0, float -0.0, float -0.0 >, %B
- ret <4 x float> %D
-}
diff --git a/test/CodeGen/CellSPU/stores.ll b/test/CodeGen/CellSPU/stores.ll
deleted file mode 100644
index 43f8776a3d..0000000000
--- a/test/CodeGen/CellSPU/stores.ll
+++ /dev/null
@@ -1,181 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep 'stqd.*0($3)' %t1.s | count 4
-; RUN: grep 'stqd.*16($3)' %t1.s | count 4
-; RUN: grep 16256 %t1.s | count 2
-; RUN: grep 16384 %t1.s | count 1
-; RUN: grep 771 %t1.s | count 4
-; RUN: grep 515 %t1.s | count 2
-; RUN: grep 1799 %t1.s | count 2
-; RUN: grep 1543 %t1.s | count 5
-; RUN: grep 1029 %t1.s | count 3
-; RUN: grep 'shli.*, 4' %t1.s | count 4
-; RUN: grep stqx %t1.s | count 4
-; RUN: grep ilhu %t1.s | count 11
-; RUN: grep iohl %t1.s | count 8
-; RUN: grep shufb %t1.s | count 15
-; RUN: grep frds %t1.s | count 1
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-; ModuleID = 'stores.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-define void @store_v16i8_1(<16 x i8>* %a) nounwind {
-entry:
- store <16 x i8> < i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1 >, <16 x i8>* %a
- ret void
-}
-
-define void @store_v16i8_2(<16 x i8>* %a) nounwind {
-entry:
- %arrayidx = getelementptr <16 x i8>* %a, i32 1
- store <16 x i8> < i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2 >, <16 x i8>* %arrayidx
- ret void
-}
-
-define void @store_v16i8_3(<16 x i8>* %a, i32 %i) nounwind {
-entry:
- %arrayidx = getelementptr <16 x i8>* %a, i32 %i
- store <16 x i8> < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 >, <16 x i8>* %arrayidx
- ret void
-}
-
-define void @store_v8i16_1(<8 x i16>* %a) nounwind {
-entry:
- store <8 x i16> < i16 1, i16 2, i16 1, i16 1, i16 1, i16 2, i16 1, i16 1 >, <8 x i16>* %a
- ret void
-}
-
-define void @store_v8i16_2(<8 x i16>* %a) nounwind {
-entry:
- %arrayidx = getelementptr <8 x i16>* %a, i16 1
- store <8 x i16> < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2 >, <8 x i16>* %arrayidx
- ret void
-}
-
-define void @store_v8i16_3(<8 x i16>* %a, i32 %i) nounwind {
-entry:
- %arrayidx = getelementptr <8 x i16>* %a, i32 %i
- store <8 x i16> < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >, <8 x i16>* %arrayidx
- ret void
-}
-
-define void @store_v4i32_1(<4 x i32>* %a) nounwind {
-entry:
- store <4 x i32> < i32 1, i32 2, i32 1, i32 1 >, <4 x i32>* %a
- ret void
-}
-
-define void @store_v4i32_2(<4 x i32>* %a) nounwind {
-entry:
- %arrayidx = getelementptr <4 x i32>* %a, i32 1
- store <4 x i32> < i32 2, i32 2, i32 2, i32 2 >, <4 x i32>* %arrayidx
- ret void
-}
-
-define void @store_v4i32_3(<4 x i32>* %a, i32 %i) nounwind {
-entry:
- %arrayidx = getelementptr <4 x i32>* %a, i32 %i
- store <4 x i32> < i32 1, i32 1, i32 1, i32 1 >, <4 x i32>* %arrayidx
- ret void
-}
-
-define void @store_v4f32_1(<4 x float>* %a) nounwind {
-entry:
- store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %a
- ret void
-}
-
-define void @store_v4f32_2(<4 x float>* %a) nounwind {
-entry:
- %arrayidx = getelementptr <4 x float>* %a, i32 1
- store <4 x float> < float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00 >, <4 x float>* %arrayidx
- ret void
-}
-
-define void @store_v4f32_3(<4 x float>* %a, i32 %i) nounwind {
-entry:
- %arrayidx = getelementptr <4 x float>* %a, i32 %i
- store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %arrayidx
- ret void
-}
-
-; Test truncating stores:
-
-define zeroext i8 @tstore_i16_i8(i16 signext %val, i8* %dest) nounwind {
-entry:
- %conv = trunc i16 %val to i8
- store i8 %conv, i8* %dest
- ret i8 %conv
-}
-
-define zeroext i8 @tstore_i32_i8(i32 %val, i8* %dest) nounwind {
-entry:
- %conv = trunc i32 %val to i8
- store i8 %conv, i8* %dest
- ret i8 %conv
-}
-
-define signext i16 @tstore_i32_i16(i32 %val, i16* %dest) nounwind {
-entry:
- %conv = trunc i32 %val to i16
- store i16 %conv, i16* %dest
- ret i16 %conv
-}
-
-define zeroext i8 @tstore_i64_i8(i64 %val, i8* %dest) nounwind {
-entry:
- %conv = trunc i64 %val to i8
- store i8 %conv, i8* %dest
- ret i8 %conv
-}
-
-define signext i16 @tstore_i64_i16(i64 %val, i16* %dest) nounwind {
-entry:
- %conv = trunc i64 %val to i16
- store i16 %conv, i16* %dest
- ret i16 %conv
-}
-
-define i32 @tstore_i64_i32(i64 %val, i32* %dest) nounwind {
-entry:
- %conv = trunc i64 %val to i32
- store i32 %conv, i32* %dest
- ret i32 %conv
-}
-
-define float @tstore_f64_f32(double %val, float* %dest) nounwind {
-entry:
- %conv = fptrunc double %val to float
- store float %conv, float* %dest
- ret float %conv
-}
-
-;Check stores that might span two 16 byte memory blocks
-define void @store_misaligned( i32 %val, i32* %ptr) {
-;CHECK: store_misaligned
-;CHECK: lqd
-;CHECK: lqd
-;CHECK: stqd
-;CHECK: stqd
-;CHECK: bi $lr
- store i32 %val, i32*%ptr, align 2
- ret void
-}
-
-define void @store_v8( <8 x float> %val, <8 x float>* %ptr )
-{
-;CHECK: stq
-;CHECK: stq
-;CHECK: bi $lr
- store <8 x float> %val, <8 x float>* %ptr
- ret void
-}
-
-define void @store_null_vec( <4 x i32> %val ) {
-; FIXME - this is for some reason compiled into a il+stqd, not a sta.
-;CHECK: stqd
-;CHECK: bi $lr
- store <4 x i32> %val, <4 x i32>* null
- ret void
-}
diff --git a/test/CodeGen/CellSPU/storestruct.ll b/test/CodeGen/CellSPU/storestruct.ll
deleted file mode 100644
index 47185e8296..0000000000
--- a/test/CodeGen/CellSPU/storestruct.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-%0 = type {i32, i32}
-@buffer = global [ 72 x %0 ] zeroinitializer
-
-define void@test( ) {
-; Check that there is no illegal "a rt, ra, imm" instruction
-; CHECK-NOT: a {{\$., \$., 5..}}
-; CHECK: a {{\$., \$., \$.}}
- store %0 {i32 1, i32 2} ,
- %0* getelementptr ([72 x %0]* @buffer, i32 0, i32 71)
- ret void
-}
diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll
deleted file mode 100644
index 8c3275080c..0000000000
--- a/test/CodeGen/CellSPU/struct_1.ll
+++ /dev/null
@@ -1,147 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
-; RUN: grep lqa %t1.s | count 5
-; RUN: grep lqd %t1.s | count 11
-; RUN: grep rotqbyi %t1.s | count 7
-; RUN: grep xshw %t1.s | count 1
-; RUN: grep andi %t1.s | count 5
-; RUN: grep cbd %t1.s | count 3
-; RUN: grep chd %t1.s | count 1
-; RUN: grep cwd %t1.s | count 3
-; RUN: grep shufb %t1.s | count 7
-; RUN: grep stqd %t1.s | count 7
-; RUN: grep iohl %t2.s | count 16
-; RUN: grep ilhu %t2.s | count 16
-; RUN: grep lqd %t2.s | count 16
-; RUN: grep rotqbyi %t2.s | count 7
-; RUN: grep xshw %t2.s | count 1
-; RUN: grep andi %t2.s | count 5
-; RUN: grep cbd %t2.s | count 3
-; RUN: grep chd %t2.s | count 1
-; RUN: grep cwd %t2.s | count 3
-; RUN: grep shufb %t2.s | count 7
-; RUN: grep stqd %t2.s | count 7
-
-; CellSPU legalization is over-sensitive to Legalize's traversal order.
-; XFAIL: *
-
-; ModuleID = 'struct_1.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
-target triple = "spu"
-
-; struct hackstate {
-; unsigned char c1; // offset 0 (rotate left by 13 bytes to byte 3)
-; unsigned char c2; // offset 1 (rotate left by 14 bytes to byte 3)
-; unsigned char c3; // offset 2 (rotate left by 15 bytes to byte 3)
-; int i1; // offset 4 (rotate left by 4 bytes to byte 0)
-; short s1; // offset 8 (rotate left by 6 bytes to byte 2)
-; int i2; // offset 12 [ignored]
-; unsigned char c4; // offset 16 [ignored]
-; unsigned char c5; // offset 17 [ignored]
-; unsigned char c6; // offset 18 (rotate left by 14 bytes to byte 3)
-; unsigned char c7; // offset 19 (no rotate, in preferred slot)
-; int i3; // offset 20 [ignored]
-; int i4; // offset 24 [ignored]
-; int i5; // offset 28 [ignored]
-; int i6; // offset 32 (no rotate, in preferred slot)
-; }
-%struct.hackstate = type { i8, i8, i8, i32, i16, i32, i8, i8, i8, i8, i32, i32, i32, i32 }
-
-; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
-@state = global %struct.hackstate zeroinitializer, align 16
-
-define zeroext i8 @get_hackstate_c1() nounwind {
-entry:
- %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
- ret i8 %tmp2
-}
-
-define zeroext i8 @get_hackstate_c2() nounwind {
-entry:
- %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
- ret i8 %tmp2
-}
-
-define zeroext i8 @get_hackstate_c3() nounwind {
-entry:
- %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
- ret i8 %tmp2
-}
-
-define i32 @get_hackstate_i1() nounwind {
-entry:
- %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
- ret i32 %tmp2
-}
-
-define signext i16 @get_hackstate_s1() nounwind {
-entry:
- %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
- ret i16 %tmp2
-}
-
-define zeroext i8 @get_hackstate_c6() nounwind {
-entry:
- %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 8), align 16
- ret i8 %tmp2
-}
-
-define zeroext i8 @get_hackstate_c7() nounwind {
-entry:
- %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16
- ret i8 %tmp2
-}
-
-define i32 @get_hackstate_i3() nounwind {
-entry:
- %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16
- ret i32 %tmp2
-}
-
-define i32 @get_hackstate_i6() nounwind {
-entry:
- %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16
- ret i32 %tmp2
-}
-
-define void @set_hackstate_c1(i8 zeroext %c) nounwind {
-entry:
- store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
- ret void
-}
-
-define void @set_hackstate_c2(i8 zeroext %c) nounwind {
-entry:
- store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
- ret void
-}
-
-define void @set_hackstate_c3(i8 zeroext %c) nounwind {
-entry:
- store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
- ret void
-}
-
-define void @set_hackstate_i1(i32 %i) nounwind {
-entry:
- store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
- ret void
-}
-
-define void @set_hackstate_s1(i16 signext %s) nounwind {
-entry:
- store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
- ret void
-}
-
-define void @set_hackstate_i3(i32 %i) nounwind {
-entry:
- store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16
- ret void
-}
-
-define void @set_hackstate_i6(i32 %i) nounwind {
-entry:
- store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16
- ret void
-}
diff --git a/test/CodeGen/CellSPU/sub_ops.ll b/test/CodeGen/CellSPU/sub_ops.ll
deleted file mode 100644
index f0c40d37ce..0000000000
--- a/test/CodeGen/CellSPU/sub_ops.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llc < %s -march=cellspu | FileCheck %s
-
-define i32 @subword( i32 %param1, i32 %param2) {
-; Check ordering of registers ret=param1-param2 -> rt=rb-ra
-; CHECK-NOT: sf $3, $3, $4
-; CHECK: sf $3, $4, $3
- %1 = sub i32 %param1, %param2
- ret i32 %1
-}
-
-define i16 @subhword( i16 %param1, i16 %param2) {
-; Check ordering of registers ret=param1-param2 -> rt=rb-ra
-; CHECK-NOT: sfh $3, $3, $4
-; CHECK: sfh $3, $4, $3
- %1 = sub i16 %param1, %param2
- ret i16 %1
-}
-
-define float @subfloat( float %param1, float %param2) {
-; Check ordering of registers ret=param1-param2 -> rt=ra-rb
-; (yes this is reverse of i32 instruction)
-; CHECK-NOT: fs $3, $4, $3
-; CHECK: fs $3, $3, $4
- %1 = fsub float %param1, %param2
- ret float %1
-}
diff --git a/test/CodeGen/CellSPU/trunc.ll b/test/CodeGen/CellSPU/trunc.ll
deleted file mode 100644
index e4c8fb49a3..0000000000
--- a/test/CodeGen/CellSPU/trunc.ll
+++ /dev/null
@@ -1,94 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep shufb %t1.s | count 19
-; RUN: grep "ilhu.*1799" %t1.s | count 1
-; RUN: grep "ilhu.*771" %t1.s | count 2
-; RUN: grep "ilhu.*1543" %t1.s | count 1
-; RUN: grep "ilhu.*1029" %t1.s | count 1
-; RUN: grep "ilhu.*515" %t1.s | count 1
-; RUN: grep "ilhu.*3855" %t1.s | count 1
-; RUN: grep "ilhu.*3599" %t1.s | count 1
-; RUN: grep "ilhu.*3085" %t1.s | count 1
-; RUN: grep "iohl.*3855" %t1.s | count 1
-; RUN: grep "iohl.*3599" %t1.s | count 2
-; RUN: grep "iohl.*1543" %t1.s | count 2
-; RUN: grep "iohl.*771" %t1.s | count 2
-; RUN: grep "iohl.*515" %t1.s | count 1
-; RUN: grep "iohl.*1799" %t1.s | count 1
-; RUN: grep lqa %t1.s | count 1
-; RUN: grep cbd %t1.s | count 4
-; RUN: grep chd %t1.s | count 3
-; RUN: grep cwd %t1.s | count 1
-; RUN: grep cdd %t1.s | count 1
-
-; ModuleID = 'trunc.bc'
-target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128"
-target triple = "spu"
-
-define <16 x i8> @trunc_i128_i8(i128 %u, <16 x i8> %v) {
-entry:
- %0 = trunc i128 %u to i8
- %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 15
- ret <16 x i8> %tmp1
-}
-
-define <8 x i16> @trunc_i128_i16(i128 %u, <8 x i16> %v) {
-entry:
- %0 = trunc i128 %u to i16
- %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 8
- ret <8 x i16> %tmp1
-}
-
-define <4 x i32> @trunc_i128_i32(i128 %u, <4 x i32> %v) {
-entry:
- %0 = trunc i128 %u to i32
- %tmp1 = insertelement <4 x i32> %v, i32 %0, i32 2
- ret <4 x i32> %tmp1
-}
-
-define <2 x i64> @trunc_i128_i64(i128 %u, <2 x i64> %v) {
-entry:
- %0 = trunc i128 %u to i64
- %tmp1 = insertelement <2 x i64> %v, i64 %0, i32 1
- ret <2 x i64> %tmp1
-}
-
-define <16 x i8> @trunc_i64_i8(i64 %u, <16 x i8> %v) {
-entry:
- %0 = trunc i64 %u to i8
- %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 10
- ret <16 x i8> %tmp1
-}
-
-define <8 x i16> @trunc_i64_i16(i64 %u, <8 x i16> %v) {
-entry:
- %0 = trunc i64 %u to i16
- %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 6
- ret <8 x i16> %tmp1
-}
-
-define i32 @trunc_i64_i32(i64 %u) {
-entry:
- %0 = trunc i64 %u to i32
- ret i32 %0
-}
-
-define <16 x i8> @trunc_i32_i8(i32 %u, <16 x i8> %v) {
-entry:
- %0 = trunc i32 %u to i8
- %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 7
- ret <16 x i8> %tmp1
-}
-
-define <8 x i16> @trunc_i32_i16(i32 %u, <8 x i16> %v) {
-entry:
- %0 = trunc i32 %u to i16
- %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 3
- ret <8 x i16> %tmp1
-}
-
-define <16 x i8> @trunc_i16_i8(i16 %u, <16 x i8> %v) {
-entry:
- %0 = trunc i16 %u to i8
- %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 5
- ret <16 x i8> %tmp1
-}
diff --git a/test/CodeGen/CellSPU/useful-harnesses/README.txt b/test/CodeGen/CellSPU/useful-harnesses/README.txt
deleted file mode 100644
index d87b3989e4..0000000000
--- a/test/CodeGen/CellSPU/useful-harnesses/README.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-This directory contains code that's not part of the DejaGNU test suite,
-but is generally useful as various test harnesses.
-
-vecoperations.c: Various vector operation sanity checks, e.g., shuffles,
- 8-bit vector add and multiply.
diff --git a/test/CodeGen/CellSPU/useful-harnesses/i32operations.c b/test/CodeGen/CellSPU/useful-harnesses/i32operations.c
deleted file mode 100644
index 12fc30bf65..0000000000
--- a/test/CodeGen/CellSPU/useful-harnesses/i32operations.c
+++ /dev/null
@@ -1,69 +0,0 @@
-#include <stdio.h>
-
-typedef unsigned int uint32_t;
-typedef int int32_t;
-
-const char *boolstring(int val) {
- return val ? "true" : "false";
-}
-
-int i32_eq(int32_t a, int32_t b) {
- return (a == b);
-}
-
-int i32_neq(int32_t a, int32_t b) {
- return (a != b);
-}
-
-int32_t i32_eq_select(int32_t a, int32_t b, int32_t c, int32_t d) {
- return ((a == b) ? c : d);
-}
-
-int32_t i32_neq_select(int32_t a, int32_t b, int32_t c, int32_t d) {
- return ((a != b) ? c : d);
-}
-
-struct pred_s {
- const char *name;
- int (*predfunc)(int32_t, int32_t);
- int (*selfunc)(int32_t, int32_t, int32_t, int32_t);
-};
-
-struct pred_s preds[] = {
- { "eq", i32_eq, i32_eq_select },
- { "neq", i32_neq, i32_neq_select }
-};
-
-int main(void) {
- int i;
- int32_t a = 1234567890;
- int32_t b = 345678901;
- int32_t c = 1234500000;
- int32_t d = 10001;
- int32_t e = 10000;
-
- printf("a = %12d (0x%08x)\n", a, a);
- printf("b = %12d (0x%08x)\n", b, b);
- printf("c = %12d (0x%08x)\n", c, c);
- printf("d = %12d (0x%08x)\n", d, d);
- printf("e = %12d (0x%08x)\n", e, e);
- printf("----------------------------------------\n");
-
- for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) {
- printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a)));
- printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a)));
- printf("a %s b = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, b)));
- printf("a %s c = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, c)));
- printf("d %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(d, e)));
- printf("e %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(e, e)));
-
- printf("a %s a ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, a, c, d));
- printf("a %s a ? c : d == c (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, a, c, d) == c));
- printf("a %s b ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, b, c, d));
- printf("a %s b ? c : d == d (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, b, c, d) == d));
-
- printf("----------------------------------------\n");
- }
-
- return 0;
-}
diff --git a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
deleted file mode 100644
index b613bd872e..0000000000
--- a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
+++ /dev/null
@@ -1,673 +0,0 @@
-#include <stdio.h>
-#include "i64operations.h"
-
-int64_t tval_a = 1234567890003LL;
-int64_t tval_b = 2345678901235LL;
-int64_t tval_c = 1234567890001LL;
-int64_t tval_d = 10001LL;
-int64_t tval_e = 10000LL;
-uint64_t tval_f = 0xffffff0750135eb9;
-int64_t tval_g = -1;
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-int
-i64_eq(int64_t a, int64_t b)
-{
- return (a == b);
-}
-
-int
-i64_neq(int64_t a, int64_t b)
-{
- return (a != b);
-}
-
-int
-i64_gt(int64_t a, int64_t b)
-{
- return (a > b);
-}
-
-int
-i64_le(int64_t a, int64_t b)
-{
- return (a <= b);
-}
-
-int
-i64_ge(int64_t a, int64_t b) {
- return (a >= b);
-}
-
-int
-i64_lt(int64_t a, int64_t b) {
- return (a < b);
-}
-
-int
-i64_uge(uint64_t a, uint64_t b)
-{
- return (a >= b);
-}
-
-int
-i64_ult(uint64_t a, uint64_t b)
-{
- return (a < b);
-}
-
-int
-i64_ugt(uint64_t a, uint64_t b)
-{
- return (a > b);
-}
-
-int
-i64_ule(uint64_t a, uint64_t b)
-{
- return (a <= b);
-}
-
-int64_t
-i64_eq_select(int64_t a, int64_t b, int64_t c, int64_t d)
-{
- return ((a == b) ? c : d);
-}
-
-int64_t
-i64_neq_select(int64_t a, int64_t b, int64_t c, int64_t d)
-{
- return ((a != b) ? c : d);
-}
-
-int64_t
-i64_gt_select(int64_t a, int64_t b, int64_t c, int64_t d) {
- return ((a > b) ? c : d);
-}
-
-int64_t
-i64_le_select(int64_t a, int64_t b, int64_t c, int64_t d) {
- return ((a <= b) ? c : d);
-}
-
-int64_t
-i64_ge_select(int64_t a, int64_t b, int64_t c, int64_t d) {
- return ((a >= b) ? c : d);
-}
-
-int64_t
-i64_lt_select(int64_t a, int64_t b, int64_t c, int64_t d) {
- return ((a < b) ? c : d);
-}
-
-uint64_t
-i64_ugt_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d)
-{
- return ((a > b) ? c : d);
-}
-
-uint64_t
-i64_ule_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d)
-{
- return ((a <= b) ? c : d);
-}
-
-uint64_t
-i64_uge_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) {
- return ((a >= b) ? c : d);
-}
-
-uint64_t
-i64_ult_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) {
- return ((a < b) ? c : d);
-}
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-struct harness_int64_pred int64_tests_eq[] = {
- {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c},
- {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d},
- {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d},
- {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d},
- {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}
-};
-
-struct harness_int64_pred int64_tests_neq[] = {
- {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d},
- {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c},
- {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c},
- {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c},
- {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}
-};
-
-struct harness_int64_pred int64_tests_sgt[] = {
- {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d},
- {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d},
- {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c},
- {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c},
- {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}
-};
-
-struct harness_int64_pred int64_tests_sle[] = {
- {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c},
- {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c},
- {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d},
- {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d},
- {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}
-};
-
-struct harness_int64_pred int64_tests_sge[] = {
- {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c},
- {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d},
- {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c},
- {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c},
- {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}
-};
-
-struct harness_int64_pred int64_tests_slt[] = {
- {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d},
- {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c},
- {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d},
- {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d},
- {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}
-};
-
-struct int64_pred_s int64_preds[] = {
- {"eq", i64_eq, i64_eq_select,
- int64_tests_eq, ARR_SIZE(int64_tests_eq)},
- {"neq", i64_neq, i64_neq_select,
- int64_tests_neq, ARR_SIZE(int64_tests_neq)},
- {"gt", i64_gt, i64_gt_select,
- int64_tests_sgt, ARR_SIZE(int64_tests_sgt)},
- {"le", i64_le, i64_le_select,
- int64_tests_sle, ARR_SIZE(int64_tests_sle)},
- {"ge", i64_ge, i64_ge_select,
- int64_tests_sge, ARR_SIZE(int64_tests_sge)},
- {"lt", i64_lt, i64_lt_select,
- int64_tests_slt, ARR_SIZE(int64_tests_slt)}
-};
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-struct harness_uint64_pred uint64_tests_ugt[] = {
- {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
- {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d },
- {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c },
- {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c },
- {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d }
-};
-
-struct harness_uint64_pred uint64_tests_ule[] = {
- {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
- {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
- {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
- {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
- {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}
-};
-
-struct harness_uint64_pred uint64_tests_uge[] = {
- {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
- {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
- {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
- {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
- {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}
-};
-
-struct harness_uint64_pred uint64_tests_ult[] = {
- {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
- {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
- {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
- {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
- {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
- (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}
-};
-
-struct uint64_pred_s uint64_preds[] = {
- {"ugt", i64_ugt, i64_ugt_select,
- uint64_tests_ugt, ARR_SIZE(uint64_tests_ugt)},
- {"ule", i64_ule, i64_ule_select,
- uint64_tests_ule, ARR_SIZE(uint64_tests_ule)},
- {"uge", i64_uge, i64_uge_select,
- uint64_tests_uge, ARR_SIZE(uint64_tests_uge)},
- {"ult", i64_ult, i64_ult_select,
- uint64_tests_ult, ARR_SIZE(uint64_tests_ult)}
-};
-
-int
-compare_expect_int64(const struct int64_pred_s * pred)
-{
- int j, failed = 0;
-
- for (j = 0; j < pred->n_tests; ++j) {
- int pred_result;
-
- pred_result = (*pred->predfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs);
-
- if (pred_result != pred->tests[j].expected) {
- char str[64];
-
- sprintf(str, pred->tests[j].fmt_string, pred->name);
- printf("%s: returned value is %d, expecting %d\n", str,
- pred_result, pred->tests[j].expected);
- printf(" lhs = %19lld (0x%016llx)\n", *pred->tests[j].lhs,
- *pred->tests[j].lhs);
- printf(" rhs = %19lld (0x%016llx)\n", *pred->tests[j].rhs,
- *pred->tests[j].rhs);
- ++failed;
- } else {
- int64_t selresult;
-
- selresult = (pred->selfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs,
- *pred->tests[j].select_a,
- *pred->tests[j].select_b);
-
- if (selresult != *pred->tests[j].select_expected) {
- char str[64];
-
- sprintf(str, pred->tests[j].fmt_string, pred->name);
- printf("%s select: returned value is %d, expecting %d\n", str,
- pred_result, pred->tests[j].expected);
- printf(" lhs = %19lld (0x%016llx)\n", *pred->tests[j].lhs,
- *pred->tests[j].lhs);
- printf(" rhs = %19lld (0x%016llx)\n", *pred->tests[j].rhs,
- *pred->tests[j].rhs);
- printf(" true = %19lld (0x%016llx)\n", *pred->tests[j].select_a,
- *pred->tests[j].select_a);
- printf(" false = %19lld (0x%016llx)\n", *pred->tests[j].select_b,
- *pred->tests[j].select_b);
- ++failed;
- }
- }
- }
-
- printf(" %d tests performed, should be %d.\n", j, pred->n_tests);
-
- return failed;
-}
-
-int
-compare_expect_uint64(const struct uint64_pred_s * pred)
-{
- int j, failed = 0;
-
- for (j = 0; j < pred->n_tests; ++j) {
- int pred_result;
-
- pred_result = (*pred->predfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs);
- if (pred_result != pred->tests[j].expected) {
- char str[64];
-
- sprintf(str, pred->tests[j].fmt_string, pred->name);
- printf("%s: returned value is %d, expecting %d\n", str,
- pred_result, pred->tests[j].expected);
- printf(" lhs = %19llu (0x%016llx)\n", *pred->tests[j].lhs,
- *pred->tests[j].lhs);
- printf(" rhs = %19llu (0x%016llx)\n", *pred->tests[j].rhs,
- *pred->tests[j].rhs);
- ++failed;
- } else {
- uint64_t selresult;
-
- selresult = (pred->selfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs,
- *pred->tests[j].select_a,
- *pred->tests[j].select_b);
- if (selresult != *pred->tests[j].select_expected) {
- char str[64];
-
- sprintf(str, pred->tests[j].fmt_string, pred->name);
- printf("%s select: returned value is %d, expecting %d\n", str,
- pred_result, pred->tests[j].expected);
- printf(" lhs = %19llu (0x%016llx)\n", *pred->tests[j].lhs,
- *pred->tests[j].lhs);
- printf(" rhs = %19llu (0x%016llx)\n", *pred->tests[j].rhs,
- *pred->tests[j].rhs);
- printf(" true = %19llu (0x%016llx)\n", *pred->tests[j].select_a,
- *pred->tests[j].select_a);
- printf(" false = %19llu (0x%016llx)\n", *pred->tests[j].select_b,
- *pred->tests[j].select_b);
- ++failed;
- }
- }
- }
-
- printf(" %d tests performed, should be %d.\n", j, pred->n_tests);
-
- return failed;
-}
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-int
-test_i64_sext_i32(int in, int64_t expected) {
- int64_t result = (int64_t) in;
-
- if (result != expected) {
- char str[64];
- sprintf(str, "i64_sext_i32(%d) returns %lld\n", in, result);
- return 1;
- }
-
- return 0;
-}
-
-int
-test_i64_sext_i16(short in, int64_t expected) {
- int64_t result = (int64_t) in;
-
- if (result != expected) {
- char str[64];
- sprintf(str, "i64_sext_i16(%hd) returns %lld\n", in, result);
- return 1;
- }
-
- return 0;
-}
-
-int
-test_i64_sext_i8(signed char in, int64_t expected) {
- int64_t result = (int64_t) in;
-
- if (result != expected) {
- char str[64];
- sprintf(str, "i64_sext_i8(%d) returns %lld\n", in, result);
- return 1;
- }
-
- return 0;
-}
-
-int
-test_i64_zext_i32(unsigned int in, uint64_t expected) {
- uint64_t result = (uint64_t) in;
-
- if (result != expected) {
- char str[64];
- sprintf(str, "i64_zext_i32(%u) returns %llu\n", in, result);
- return 1;
- }
-
- return 0;
-}
-
-int
-test_i64_zext_i16(unsigned short in, uint64_t expected) {
- uint64_t result = (uint64_t) in;
-
- if (result != expected) {
- char str[64];
- sprintf(str, "i64_zext_i16(%hu) returns %llu\n", in, result);
- return 1;
- }
-
- return 0;
-}
-
-int
-test_i64_zext_i8(unsigned char in, uint64_t expected) {
- uint64_t result = (uint64_t) in;
-
- if (result != expected) {
- char str[64];
- sprintf(str, "i64_zext_i8(%u) returns %llu\n", in, result);
- return 1;
- }
-
- return 0;
-}
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-int64_t
-i64_shl_const(int64_t a) {
- return a << 10;
-}
-
-int64_t
-i64_shl(int64_t a, int amt) {
- return a << amt;
-}
-
-uint64_t
-u64_shl_const(uint64_t a) {
- return a << 10;
-}
-
-uint64_t
-u64_shl(uint64_t a, int amt) {
- return a << amt;
-}
-
-int64_t
-i64_srl_const(int64_t a) {
- return a >> 10;
-}
-
-int64_t
-i64_srl(int64_t a, int amt) {
- return a >> amt;
-}
-
-uint64_t
-u64_srl_const(uint64_t a) {
- return a >> 10;
-}
-
-uint64_t
-u64_srl(uint64_t a, int amt) {
- return a >> amt;
-}
-
-int64_t
-i64_sra_const(int64_t a) {
- return a >> 10;
-}
-
-int64_t
-i64_sra(int64_t a, int amt) {
- return a >> amt;
-}
-
-uint64_t
-u64_sra_const(uint64_t a) {
- return a >> 10;
-}
-
-uint64_t
-u64_sra(uint64_t a, int amt) {
- return a >> amt;
-}
-
-int
-test_u64_constant_shift(const char *func_name, uint64_t (*func)(uint64_t), uint64_t a, uint64_t expected) {
- uint64_t result = (*func)(a);
-
- if (result != expected) {
- printf("%s(0x%016llx) returns 0x%016llx, expected 0x%016llx\n", func_name, a, result, expected);
- return 1;
- }
-
- return 0;
-}
-
-int
-test_i64_constant_shift(const char *func_name, int64_t (*func)(int64_t), int64_t a, int64_t expected) {
- int64_t result = (*func)(a);
-
- if (result != expected) {
- printf("%s(0x%016llx) returns 0x%016llx, expected 0x%016llx\n", func_name, a, result, expected);
- return 1;
- }
-
- return 0;
-}
-
-int
-test_u64_variable_shift(const char *func_name, uint64_t (*func)(uint64_t, int), uint64_t a, unsigned int b, uint64_t expected) {
- uint64_t result = (*func)(a, b);
-
- if (result != expected) {
- printf("%s(0x%016llx, %d) returns 0x%016llx, expected 0x%016llx\n", func_name, a, b, result, expected);
- return 1;
- }
-
- return 0;
-}
-
-int
-test_i64_variable_shift(const char *func_name, int64_t (*func)(int64_t, int), int64_t a, unsigned int b, int64_t expected) {
- int64_t result = (*func)(a, b);
-
- if (result != expected) {
- printf("%s(0x%016llx, %d) returns 0x%016llx, expected 0x%016llx\n", func_name, a, b, result, expected);
- return 1;
- }
-
- return 0;
-}
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-int64_t i64_mul(int64_t a, int64_t b) {
- return a * b;
-}
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-
-int
-main(void)
-{
- int i, j, failed = 0;
- const char *something_failed = " %d tests failed.\n";
- const char *all_tests_passed = " All tests passed.\n";
-
- printf("tval_a = %20lld (0x%016llx)\n", tval_a, tval_a);
- printf("tval_b = %20lld (0x%016llx)\n", tval_b, tval_b);
- printf("tval_c = %20lld (0x%016llx)\n", tval_c, tval_c);
- printf("tval_d = %20lld (0x%016llx)\n", tval_d, tval_d);
- printf("tval_e = %20lld (0x%016llx)\n", tval_e, tval_e);
- printf("tval_f = %20llu (0x%016llx)\n", tval_f, tval_f);
- printf("tval_g = %20llu (0x%016llx)\n", tval_g, tval_g);
- printf("----------------------------------------\n");
-
- for (i = 0; i < ARR_SIZE(int64_preds); ++i) {
- printf("%s series:\n", int64_preds[i].name);
- if ((failed = compare_expect_int64(int64_preds + i)) > 0) {
- printf(something_failed, failed);
- } else {
- printf(all_tests_passed);
- }
-
- printf("----------------------------------------\n");
- }
-
- for (i = 0; i < ARR_SIZE(uint64_preds); ++i) {
- printf("%s series:\n", uint64_preds[i].name);
- if ((failed = compare_expect_uint64(uint64_preds + i)) > 0) {
- printf(something_failed, failed);
- } else {
- printf(all_tests_passed);
- }
-
- printf("----------------------------------------\n");
- }
-
- /*----------------------------------------------------------------------*/
-
- puts("signed/zero-extend tests:");
-
- failed = 0;
- failed += test_i64_sext_i32(-1, -1LL);
- failed += test_i64_sext_i32(10, 10LL);
- failed += test_i64_sext_i32(0x7fffffff, 0x7fffffffLL);
- failed += test_i64_sext_i16(-1, -1LL);
- failed += test_i64_sext_i16(10, 10LL);
- failed += test_i64_sext_i16(0x7fff, 0x7fffLL);
- failed += test_i64_sext_i8(-1, -1LL);
- failed += test_i64_sext_i8(10, 10LL);
- failed += test_i64_sext_i8(0x7f, 0x7fLL);
-
- failed += test_i64_zext_i32(0xffffffff, 0x00000000ffffffffLLU);
- failed += test_i64_zext_i32(0x01234567, 0x0000000001234567LLU);
- failed += test_i64_zext_i16(0xffff, 0x000000000000ffffLLU);
- failed += test_i64_zext_i16(0x569a, 0x000000000000569aLLU);
- failed += test_i64_zext_i8(0xff, 0x00000000000000ffLLU);
- failed += test_i64_zext_i8(0xa0, 0x00000000000000a0LLU);
-
- if (failed > 0) {
- printf(" %d tests failed.\n", failed);
- } else {
- printf(" All tests passed.\n");
- }
-
- printf("----------------------------------------\n");
-
- failed = 0;
- puts("signed left/right shift tests:");
- failed += test_i64_constant_shift("i64_shl_const", i64_shl_const, tval_a, 0x00047dc7ec114c00LL);
- failed += test_i64_variable_shift("i64_shl", i64_shl, tval_a, 10, 0x00047dc7ec114c00LL);
- failed += test_i64_constant_shift("i64_srl_const", i64_srl_const, tval_a, 0x0000000047dc7ec1LL);
- failed += test_i64_variable_shift("i64_srl", i64_srl, tval_a, 10, 0x0000000047dc7ec1LL);
- failed += test_i64_constant_shift("i64_sra_const", i64_sra_const, tval_a, 0x0000000047dc7ec1LL);
- failed += test_i64_variable_shift("i64_sra", i64_sra, tval_a, 10, 0x0000000047dc7ec1LL);
-
- if (failed > 0) {
- printf(" %d tests ailed.\n", failed);
- } else {
- printf(" All tests passed.\n");
- }
-
- printf("----------------------------------------\n");
-
- failed = 0;
- puts("unsigned left/right shift tests:");
- failed += test_u64_constant_shift("u64_shl_const", u64_shl_const, tval_f, 0xfffc1d404d7ae400LL);
- failed += test_u64_variable_shift("u64_shl", u64_shl, tval_f, 10, 0xfffc1d404d7ae400LL);
- failed += test_u64_constant_shift("u64_srl_const", u64_srl_const, tval_f, 0x003fffffc1d404d7LL);
- failed += test_u64_variable_shift("u64_srl", u64_srl, tval_f, 10, 0x003fffffc1d404d7LL);
- failed += test_i64_constant_shift("i64_sra_const", i64_sra_const, tval_f, 0xffffffffc1d404d7LL);
- failed += test_i64_variable_shift("i64_sra", i64_sra, tval_f, 10, 0xffffffffc1d404d7LL);
- failed += test_u64_constant_shift("u64_sra_const", u64_sra_const, tval_f, 0x003fffffc1d404d7LL);
- failed += test_u64_variable_shift("u64_sra", u64_sra, tval_f, 10, 0x003fffffc1d404d7LL);
-
- if (failed > 0) {
- printf(" %d tests ailed.\n", failed);
- } else {
- printf(" All tests passed.\n");
- }
-
- printf("----------------------------------------\n");
-
- int64_t result;
-
- result = i64_mul(tval_g, tval_g);
- printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_g, tval_g, result, result);
- result = i64_mul(tval_d, tval_e);
- printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_d, tval_e, result, result);
- /* 0xba7a664f13077c9 */
- result = i64_mul(tval_a, tval_b);
- printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_a, tval_b, result, result);
-
- printf("----------------------------------------\n");
-
- return 0;
-}
diff --git a/test/CodeGen/CellSPU/useful-harnesses/i64operations.h b/test/CodeGen/CellSPU/useful-harnesses/i64operations.h
deleted file mode 100644
index 7a02794cd7..0000000000
--- a/test/CodeGen/CellSPU/useful-harnesses/i64operations.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#define TRUE_VAL (!0)
-#define FALSE_VAL 0
-#define ARR_SIZE(arr) (sizeof(arr)/sizeof(arr[0]))
-
-typedef unsigned long long int uint64_t;
-typedef long long int int64_t;
-
-/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
-struct harness_int64_pred {
- const char *fmt_string;
- int64_t *lhs;
- int64_t *rhs;
- int64_t *select_a;
- int64_t *select_b;
- int expected;
- int64_t *select_expected;
-};
-
-struct harness_uint64_pred {
- const char *fmt_string;
- uint64_t *lhs;
- uint64_t *rhs;
- uint64_t *select_a;
- uint64_t *select_b;
- int expected;
- uint64_t *select_expected;
-};
-
-struct int64_pred_s {
- const char *name;
- int (*predfunc) (int64_t, int64_t);
- int64_t (*selfunc) (int64_t, int64_t, int64_t, int64_t);
- struct harness_int64_pred *tests;
- int n_tests;
-};
-
-struct uint64_pred_s {
- const char *name;
- int (*predfunc) (uint64_t, uint64_t);
- uint64_t (*selfunc) (uint64_t, uint64_t, uint64_t, uint64_t);
- struct harness_uint64_pred *tests;
- int n_tests;
-};
diff --git a/test/CodeGen/CellSPU/useful-harnesses/lit.local.cfg b/test/CodeGen/CellSPU/useful-harnesses/lit.local.cfg
deleted file mode 100644
index e6f55eef7a..0000000000
--- a/test/CodeGen/CellSPU/useful-harnesses/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.suffixes = []
diff --git a/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c b/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c
deleted file mode 100644
index c4c86e3763..0000000000
--- a/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c
+++ /dev/null
@@ -1,179 +0,0 @@
-#include <stdio.h>
-
-typedef unsigned char v16i8 __attribute__((ext_vector_type(16)));
-typedef short v8i16 __attribute__((ext_vector_type(16)));
-typedef int v4i32 __attribute__((ext_vector_type(4)));
-typedef float v4f32 __attribute__((ext_vector_type(4)));
-typedef long long v2i64 __attribute__((ext_vector_type(2)));
-typedef double v2f64 __attribute__((ext_vector_type(2)));
-
-void print_v16i8(const char *str, const v16i8 v) {
- union {
- unsigned char elts[16];
- v16i8 vec;
- } tv;
- tv.vec = v;
- printf("%s = { %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, "
- "%hhu, %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, "
- "%hhu, %hhu }\n",
- str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4], tv.elts[5],
- tv.elts[6], tv.elts[7], tv.elts[8], tv.elts[9], tv.elts[10], tv.elts[11],
- tv.elts[12], tv.elts[13], tv.elts[14], tv.elts[15]);
-}
-
-void print_v16i8_hex(const char *str, const v16i8 v) {
- union {
- unsigned char elts[16];
- v16i8 vec;
- } tv;
- tv.vec = v;
- printf("%s = { 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, "
- "0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, "
- "0x%02hhx, 0x%02hhx }\n",
- str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4], tv.elts[5],
- tv.elts[6], tv.elts[7], tv.elts[8], tv.elts[9], tv.elts[10], tv.elts[11],
- tv.elts[12], tv.elts[13], tv.elts[14], tv.elts[15]);
-}
-
-void print_v8i16_hex(const char *str, v8i16 v) {
- union {
- short elts[8];
- v8i16 vec;
- } tv;
- tv.vec = v;
- printf("%s = { 0x%04hx, 0x%04hx, 0x%04hx, 0x%04hx, 0x%04hx, "
- "0x%04hx, 0x%04hx, 0x%04hx }\n",
- str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4],
- tv.elts[5], tv.elts[6], tv.elts[7]);
-}
-
-void print_v4i32(const char *str, v4i32 v) {
- printf("%s = { %d, %d, %d, %d }\n", str, v.x, v.y, v.z, v.w);
-}
-
-void print_v4f32(const char *str, v4f32 v) {
- printf("%s = { %f, %f, %f, %f }\n", str, v.x, v.y, v.z, v.w);
-}
-
-void print_v2i64(const char *str, v2i64 v) {
- printf("%s = { %lld, %lld }\n", str, v.x, v.y);
-}
-
-void print_v2f64(const char *str, v2f64 v) {
- printf("%s = { %g, %g }\n", str, v.x, v.y);
-}
-
-/*----------------------------------------------------------------------*/
-
-v16i8 v16i8_mpy(v16i8 v1, v16i8 v2) {
- return v1 * v2;
-}
-
-v16i8 v16i8_add(v16i8 v1, v16i8 v2) {
- return v1 + v2;
-}
-
-v4i32 v4i32_shuffle_1(v4i32 a) {
- v4i32 c2 = a.yzwx;
- return c2;
-}
-
-v4i32 v4i32_shuffle_2(v4i32 a) {
- v4i32 c2 = a.zwxy;
- return c2;
-}
-
-v4i32 v4i32_shuffle_3(v4i32 a) {
- v4i32 c2 = a.wxyz;
- return c2;
-}
-
-v4i32 v4i32_shuffle_4(v4i32 a) {
- v4i32 c2 = a.xyzw;
- return c2;
-}
-
-v4i32 v4i32_shuffle_5(v4i32 a) {
- v4i32 c2 = a.xwzy;
- return c2;
-}
-
-v4f32 v4f32_shuffle_1(v4f32 a) {
- v4f32 c2 = a.yzwx;
- return c2;
-}
-
-v4f32 v4f32_shuffle_2(v4f32 a) {
- v4f32 c2 = a.zwxy;
- return c2;
-}
-
-v4f32 v4f32_shuffle_3(v4f32 a) {
- v4f32 c2 = a.wxyz;
- return c2;
-}
-
-v4f32 v4f32_shuffle_4(v4f32 a) {
- v4f32 c2 = a.xyzw;
- return c2;
-}
-
-v4f32 v4f32_shuffle_5(v4f32 a) {
- v4f32 c2 = a.xwzy;
- return c2;
-}
-
-v2i64 v2i64_shuffle(v2i64 a) {
- v2i64 c2 = a.yx;
- return c2;
-}
-
-v2f64 v2f64_shuffle(v2f64 a) {
- v2f64 c2 = a.yx;
- return c2;
-}
-
-int main(void) {
- v16i8 v00 = { 0xf4, 0xad, 0x01, 0xe9, 0x51, 0x78, 0xc1, 0x8a,
- 0x94, 0x7c, 0x49, 0x6c, 0x21, 0x32, 0xb2, 0x04 };
- v16i8 va0 = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
- 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10 };
- v16i8 va1 = { 0x11, 0x83, 0x4b, 0x63, 0xff, 0x90, 0x32, 0xe5,
- 0x5a, 0xaa, 0x20, 0x01, 0x0d, 0x15, 0x77, 0x05 };
- v8i16 v01 = { 0x1a87, 0x0a14, 0x5014, 0xfff0,
- 0xe194, 0x0184, 0x801e, 0x5940 };
- v4i32 v1 = { 1, 2, 3, 4 };
- v4f32 v2 = { 1.0, 2.0, 3.0, 4.0 };
- v2i64 v3 = { 691043ll, 910301513ll };
- v2f64 v4 = { 5.8e56, 9.103e-62 };
-
- puts("---- vector tests start ----");
-
- print_v16i8_hex("v00 ", v00);
- print_v16i8_hex("va0 ", va0);
- print_v16i8_hex("va1 ", va1);
- print_v16i8_hex("va0 x va1 ", v16i8_mpy(va0, va1));
- print_v16i8_hex("va0 + va1 ", v16i8_add(va0, va1));
- print_v8i16_hex("v01 ", v01);
-
- print_v4i32("v4i32_shuffle_1(1, 2, 3, 4)", v4i32_shuffle_1(v1));
- print_v4i32("v4i32_shuffle_2(1, 2, 3, 4)", v4i32_shuffle_2(v1));
- print_v4i32("v4i32_shuffle_3(1, 2, 3, 4)", v4i32_shuffle_3(v1));
- print_v4i32("v4i32_shuffle_4(1, 2, 3, 4)", v4i32_shuffle_4(v1));
- print_v4i32("v4i32_shuffle_5(1, 2, 3, 4)", v4i32_shuffle_5(v1));
-
- print_v4f32("v4f32_shuffle_1(1, 2, 3, 4)", v4f32_shuffle_1(v2));
- print_v4f32("v4f32_shuffle_2(1, 2, 3, 4)", v4f32_shuffle_2(v2));
- print_v4f32("v4f32_shuffle_3(1, 2, 3, 4)", v4f32_shuffle_3(v2));
- print_v4f32("v4f32_shuffle_4(1, 2, 3, 4)", v4f32_shuffle_4(v2));
- print_v4f32("v4f32_shuffle_5(1, 2, 3, 4)", v4f32_shuffle_5(v2));
-
- print_v2i64("v3 ", v3);
- print_v2i64("v2i64_shuffle ", v2i64_shuffle(v3));
- print_v2f64("v4 ", v4);
- print_v2f64("v2f64_shuffle ", v2f64_shuffle(v4));
-
- puts("---- vector tests end ----");
-
- return 0;
-}
diff --git a/test/CodeGen/CellSPU/v2f32.ll b/test/CodeGen/CellSPU/v2f32.ll
deleted file mode 100644
index 09e15ffbc7..0000000000
--- a/test/CodeGen/CellSPU/v2f32.ll
+++ /dev/null
@@ -1,78 +0,0 @@
-;RUN: llc --march=cellspu %s -o - | FileCheck %s
-%vec = type <2 x float>
-
-define %vec @test_ret(%vec %param)
-{
-;CHECK: bi $lr
- ret %vec %param
-}
-
-define %vec @test_add(%vec %param)
-{
-;CHECK: fa {{\$.}}, $3, $3
- %1 = fadd %vec %param, %param
-;CHECK: bi $lr
- ret %vec %1
-}
-
-define %vec @test_sub(%vec %param)
-{
-;CHECK: fs {{\$.}}, $3, $3
- %1 = fsub %vec %param, %param
-
-;CHECK: bi $lr
- ret %vec %1
-}
-
-define %vec @test_mul(%vec %param)
-{
-;CHECK: fm {{\$.}}, $3, $3
- %1 = fmul %vec %param, %param
-
-;CHECK: bi $lr
- ret %vec %1
-}
-
-; CHECK: test_splat:
-define %vec @test_splat(float %param ) {
-;CHECK: lqa
-;CHECK: shufb
- %sv = insertelement <1 x float> undef, float %param, i32 0
- %rv = shufflevector <1 x float> %sv, <1 x float> undef, <2 x i32> zeroinitializer
-;CHECK: bi $lr
- ret %vec %rv
-}
-
-define void @test_store(%vec %val, %vec* %ptr){
-; CHECK: test_store:
-;CHECK: stqd
- store %vec zeroinitializer, %vec* null
-
-;CHECK: stqd $3, 0(${{.*}})
-;CHECK: bi $lr
- store %vec %val, %vec* %ptr
- ret void
-}
-
-; CHECK: test_insert:
-define %vec @test_insert(){
-;CHECK: cwd
-;CHECK: shufb $3
- %rv = insertelement %vec undef, float 0.0e+00, i32 undef
-;CHECK: bi $lr
- ret %vec %rv
-}
-
-; CHECK: test_unaligned_store:
-
-define void @test_unaligned_store() {
-;CHECK: cdd
-;CHECK: shufb
-;CHECK: stqd
- %data = alloca [4 x float], align 16 ; <[4 x float]*> [#uses=1]
- %ptr = getelementptr [4 x float]* %data, i32 0, i32 2 ; <float*> [#uses=1]
- %vptr = bitcast float* %ptr to <2 x float>* ; <[1 x <2 x float>]*> [#uses=1]
- store <2 x float> zeroinitializer, <2 x float>* %vptr
- ret void
-}
-
diff --git a/test/CodeGen/CellSPU/v2i32.ll b/test/CodeGen/CellSPU/v2i32.ll
deleted file mode 100644
index 9c5b89613d..0000000000
--- a/test/CodeGen/CellSPU/v2i32.ll
+++ /dev/null
@@ -1,61 +0,0 @@
-;RUN: llc --march=cellspu %s -o - | FileCheck %s
-%vec = type <2 x i32>
-
-define %vec @test_ret(%vec %param)
-{
-;CHECK: bi $lr
- ret %vec %param
-}
-
-define %vec @test_add(%vec %param)
-{
-;CHECK: shufb
-;CHECK: addx
- %1 = add %vec %param, %param
-;CHECK: bi $lr
- ret %vec %1
-}
-
-define %vec @test_sub(%vec %param)
-{
- %1 = sub %vec %param, <i32 1, i32 1>
-;CHECK: bi $lr
- ret %vec %1
-}
-
-define %vec @test_mul(%vec %param)
-{
- %1 = mul %vec %param, %param
-;CHECK: bi $lr
- ret %vec %1
-}
-
-define <2 x i32> @test_splat(i32 %param ) {
-;see svn log for why this is here...
-;CHECK-NOT: or $3, $3, $3
-;CHECK: lqa
-;CHECK: shufb
- %sv = insertelement <1 x i32> undef, i32 %param, i32 0
- %rv = shufflevector <1 x i32> %sv, <1 x i32> undef, <2 x i32> zeroinitializer
-;CHECK: bi $lr
- ret <2 x i32> %rv
-}
-
-define i32 @test_extract() {
-;CHECK: shufb $3
- %rv = extractelement <2 x i32> zeroinitializer, i32 undef ; <i32> [#uses=1]
-;CHECK: bi $lr
- ret i32 %rv
-}
-
-define void @test_store( %vec %val, %vec* %ptr)
-{
- store %vec %val, %vec* %ptr
- ret void
-}
-
-define <2 x i32>* @test_alignment( [2 x <2 x i32>]* %ptr)
-{
- %rv = getelementptr [2 x <2 x i32>]* %ptr, i32 0, i32 1
- ret <2 x i32>* %rv
-}
diff --git a/test/CodeGen/CellSPU/vec_const.ll b/test/CodeGen/CellSPU/vec_const.ll
deleted file mode 100644
index 24c05c6840..0000000000
--- a/test/CodeGen/CellSPU/vec_const.ll
+++ /dev/null
@@ -1,154 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
-; RUN: grep -w il %t1.s | count 3
-; RUN: grep ilhu %t1.s | count 8
-; RUN: grep -w ilh %t1.s | count 5
-; RUN: grep iohl %t1.s | count 7
-; RUN: grep lqa %t1.s | count 6
-; RUN: grep 24672 %t1.s | count 2
-; RUN: grep 16429 %t1.s | count 1
-; RUN: grep 63572 %t1.s | count 1
-; RUN: grep 4660 %t1.s | count 1
-; RUN: grep 22136 %t1.s | count 1
-; RUN: grep 43981 %t1.s | count 1
-; RUN: grep 61202 %t1.s | count 1
-; RUN: grep 16393 %t1.s | count 1
-; RUN: grep 8699 %t1.s | count 1
-; RUN: grep 21572 %t1.s | count 1
-; RUN: grep 11544 %t1.s | count 1
-; RUN: grep 1311768467750121234 %t1.s | count 1
-; RUN: grep lqd %t2.s | count 6
-
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
-target triple = "spu-unknown-elf"
-
-; Vector constant load tests:
-
-; IL <reg>, 2
-define <4 x i32> @v4i32_constvec() {
- ret <4 x i32> < i32 2, i32 2, i32 2, i32 2 >
-}
-
-; Spill to constant pool
-define <4 x i32> @v4i32_constpool() {
- ret <4 x i32> < i32 2, i32 1, i32 1, i32 2 >
-}
-
-; Max negative range for IL
-define <4 x i32> @v4i32_constvec_2() {
- ret <4 x i32> < i32 -32768, i32 -32768, i32 -32768, i32 -32768 >
-}
-
-; ILHU <reg>, 73 (0x49)
-; 4784128 = 0x490000
-define <4 x i32> @v4i32_constvec_3() {
- ret <4 x i32> < i32 4784128, i32 4784128,
- i32 4784128, i32 4784128 >
-}
-
-; ILHU <reg>, 61 (0x3d)
-; IOHL <reg>, 15395 (0x3c23)
-define <4 x i32> @v4i32_constvec_4() {
- ret <4 x i32> < i32 4013091, i32 4013091,
- i32 4013091, i32 4013091 >
-}
-
-; ILHU <reg>, 0x5050 (20560)
-; IOHL <reg>, 0x5050 (20560)
-; Tests for whether we expand the size of the bit pattern properly, because
-; this could be interpreted as an i8 pattern (0x50)
-define <4 x i32> @v4i32_constvec_5() {
- ret <4 x i32> < i32 1347440720, i32 1347440720,
- i32 1347440720, i32 1347440720 >
-}
-
-; ILH
-define <8 x i16> @v8i16_constvec_1() {
- ret <8 x i16> < i16 32767, i16 32767, i16 32767, i16 32767,
- i16 32767, i16 32767, i16 32767, i16 32767 >
-}
-
-; ILH
-define <8 x i16> @v8i16_constvec_2() {
- ret <8 x i16> < i16 511, i16 511, i16 511, i16 511, i16 511,
- i16 511, i16 511, i16 511 >
-}
-
-; ILH
-define <8 x i16> @v8i16_constvec_3() {
- ret <8 x i16> < i16 -512, i16 -512, i16 -512, i16 -512, i16 -512,
- i16 -512, i16 -512, i16 -512 >
-}
-
-; ILH <reg>, 24672 (0x6060)
-; Tests whether we expand the size of the bit pattern properly, because
-; this could be interpreted as an i8 pattern (0x60)
-define <8 x i16> @v8i16_constvec_4() {
- ret <8 x i16> < i16 24672, i16 24672, i16 24672, i16 24672, i16 24672,
- i16 24672, i16 24672, i16 24672 >
-}
-
-; ILH <reg>, 24672 (0x6060)
-; Tests whether we expand the size of the bit pattern properly, because
-; this is an i8 pattern but has to be expanded out to i16 to load it
-; properly into the vector register.
-define <16 x i8> @v16i8_constvec_1() {
- ret <16 x i8> < i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96,
- i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96 >
-}
-
-define <4 x float> @v4f32_constvec_1() {
-entry:
- ret <4 x float> < float 0x4005BF0A80000000,
- float 0x4005BF0A80000000,
- float 0x4005BF0A80000000,
- float 0x4005BF0A80000000 >
-}
-
-define <4 x float> @v4f32_constvec_2() {
-entry:
- ret <4 x float> < float 0.000000e+00,
- float 0.000000e+00,
- float 0.000000e+00,
- float 0.000000e+00 >
-}
-
-
-define <4 x float> @v4f32_constvec_3() {
-entry:
- ret <4 x float> < float 0x4005BF0A80000000,
- float 0x3810000000000000,
- float 0x47EFFFFFE0000000,
- float 0x400921FB60000000 >
-}
-
-; 1311768467750121234 => 0x 12345678 abcdef12
-; HI32_hi: 4660
-; HI32_lo: 22136
-; LO32_hi: 43981
-; LO32_lo: 61202
-define <2 x i64> @i64_constvec_1() {
-entry:
- ret <2 x i64> < i64 1311768467750121234,
- i64 1311768467750121234 >
-}
-
-define <2 x i64> @i64_constvec_2() {
-entry:
- ret <2 x i64> < i64 1, i64 1311768467750121234 >
-}
-
-define <2 x double> @f64_constvec_1() {
-entry:
- ret <2 x double> < double 0x400921fb54442d18,
- double 0xbff6a09e667f3bcd >
-}
-
-; 0x400921fb 54442d18 ->
-; (ILHU 0x4009 [16393]/IOHL 0x21fb [ 8699])
-; (ILHU 0x5444 [21572]/IOHL 0x2d18 [11544])
-define <2 x double> @f64_constvec_2() {
-entry:
- ret <2 x double> < double 0x400921fb54442d18,
- double 0x400921fb54442d18 >
-}
diff --git a/test/CodeGen/CellSPU/vecinsert.ll b/test/CodeGen/CellSPU/vecinsert.ll
deleted file mode 100644
index 8dcab1d84c..0000000000
--- a/test/CodeGen/CellSPU/vecinsert.ll
+++ /dev/null
@@ -1,131 +0,0 @@
-; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep cbd %t1.s | count 5
-; RUN: grep chd %t1.s | count 5
-; RUN: grep cwd %t1.s | count 11
-; RUN: grep -w il %t1.s | count 5
-; RUN: grep -w ilh %t1.s | count 6
-; RUN: grep iohl %t1.s | count 1
-; RUN: grep ilhu %t1.s | count 4
-; RUN: grep shufb %t1.s | count 27
-; RUN: grep 17219 %t1.s | count 1
-; RUN: grep 22598 %t1.s | count 1
-; RUN: grep -- -39 %t1.s | count 1
-; RUN: grep 24 %t1.s | count 1
-; RUN: grep 1159 %t1.s | count 1
-; RUN: FileCheck %s < %t1.s
-
-; ModuleID = 'vecinsert.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
-target triple = "spu-unknown-elf"
-
-; 67 -> 0x43, as 8-bit vector constant load = 0x4343 (17219)0x4343
-define <16 x i8> @test_v16i8(<16 x i8> %P, i8 %x) {
-entry:
- %tmp1 = insertelement <16 x i8> %P, i8 %x, i32 10
- %tmp1.1 = insertelement <16 x i8> %tmp1, i8 67, i32 7
- %tmp1.2 = insertelement <16 x i8> %tmp1.1, i8 %x, i32 15
- ret <16 x i8> %tmp1.2
-}
-
-; 22598 -> 0x5846
-define <8 x i16> @test_v8i16(<8 x i16> %P, i16 %x) {
-entry:
- %tmp1 = insertelement <8 x i16> %P, i16 %x, i32 5
- %tmp1.1 = insertelement <8 x i16> %tmp1, i16 22598, i32 7
- %tmp1.2 = insertelement <8 x i16> %tmp1.1, i16 %x, i32 2
- ret <8 x i16> %tmp1.2
-}
-
-; 1574023 -> 0x180487 (ILHU 24/IOHL 1159)
-define <4 x i32> @test_v4i32_1(<4 x i32> %P, i32 %x) {
-entry:
- %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2
- %tmp1.1 = insertelement <4 x i32> %tmp1, i32 1574023, i32 1
- %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3
- ret <4 x i32> %tmp1.2
-}
-
-; Should generate IL for the load
-define <4 x i32> @test_v4i32_2(<4 x i32> %P, i32 %x) {
-entry:
- %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2
- %tmp1.1 = insertelement <4 x i32> %tmp1, i32 -39, i32 1
- %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3
- ret <4 x i32> %tmp1.2
-}
-
-define void @variable_v16i8_1(<16 x i8>* %a, i32 %i) nounwind {
-entry:
- %arrayidx = getelementptr <16 x i8>* %a, i32 %i
- %tmp2 = load <16 x i8>* %arrayidx
- %tmp3 = insertelement <16 x i8> %tmp2, i8 1, i32 1
- %tmp8 = insertelement <16 x i8> %tmp3, i8 2, i32 11
- store <16 x i8> %tmp8, <16 x i8>* %arrayidx
- ret void
-}
-
-define void @variable_v8i16_1(<8 x i16>* %a, i32 %i) nounwind {
-entry:
- %arrayidx = getelementptr <8 x i16>* %a, i32 %i
- %tmp2 = load <8 x i16>* %arrayidx
- %tmp3 = insertelement <8 x i16> %tmp2, i16 1, i32 1
- %tmp8 = insertelement <8 x i16> %tmp3, i16 2, i32 6
- store <8 x i16> %tmp8, <8 x i16>* %arrayidx
- ret void
-}
-
-define void @variable_v4i32_1(<4 x i32>* %a, i32 %i) nounwind {
-entry:
- %arrayidx = getelementptr <4 x i32>* %a, i32 %i
- %tmp2 = load <4 x i32>* %arrayidx
- %tmp3 = insertelement <4 x i32> %tmp2, i32 1, i32 1
- %tmp8 = insertelement <4 x i32> %tmp3, i32 2, i32 2
- store <4 x i32> %tmp8, <4 x i32>* %arrayidx
- ret void
-}
-
-define void @variable_v4f32_1(<4 x float>* %a, i32 %i) nounwind {
-entry:
- %arrayidx = getelementptr <4 x float>* %a, i32 %i
- %tmp2 = load <4 x float>* %arrayidx
- %tmp3 = insertelement <4 x float> %tmp2, float 1.000000e+00, i32 1
- %tmp8 = insertelement <4 x float> %tmp3, float 2.000000e+00, i32 2
- store <4 x float> %tmp8, <4 x float>* %arrayidx
- ret void
-}
-
-define void @variable_v2i64_1(<2 x i64>* %a, i32 %i) nounwind {
-entry:
- %arrayidx = getelementptr <2 x i64>* %a, i32 %i
- %tmp2 = load <2 x i64>* %arrayidx
- %tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 0
- store <2 x i64> %tmp3, <2 x i64>* %arrayidx
- ret void
-}
-
-define void @variable_v2i64_2(<2 x i64>* %a, i32 %i) nounwind {
-entry:
- %arrayidx = getelementptr <2 x i64>* %a, i32 %i
- %tmp2 = load <2 x i64>* %arrayidx
- %tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 1
- store <2 x i64> %tmp3, <2 x i64>* %arrayidx
- ret void
-}
-
-define void @variable_v2f64_1(<2 x double>* %a, i32 %i) nounwind {
-entry:
- %arrayidx = getelementptr <2 x double>* %a, i32 %i
- %tmp2 = load <2 x double>* %arrayidx
- %tmp3 = insertelement <2 x double> %tmp2, double 1.000000e+00, i32 1
- store <2 x double> %tmp3, <2 x double>* %arrayidx
- ret void
-}
-
-define <4 x i32> @undef_v4i32( i32 %param ) {
- ;CHECK: cwd
- ;CHECK: lqa
- ;CHECK: shufb
- %val = insertelement <4 x i32> <i32 1, i32 2, i32 3, i32 4>, i32 %param, i32 undef
- ret <4 x i32> %val
-}
-
diff --git a/test/CodeGen/Generic/vector.ll b/test/CodeGen/Generic/vector.ll
index a0f9a02d4c..84814a1c12 100644
--- a/test/CodeGen/Generic/vector.ll
+++ b/test/CodeGen/Generic/vector.ll
@@ -152,3 +152,8 @@ define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) {
store %i4 %R, %i4* %P
ret void
}
+
+define <2 x i32*> @vector_gep(<2 x [3 x {i32, i32}]*> %a) {
+ %w = getelementptr <2 x [3 x {i32, i32}]*> %a, <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 1, i32 1>
+ ret <2 x i32*> %w
+}
diff --git a/test/CodeGen/Hexagon/postinc-load.ll b/test/CodeGen/Hexagon/postinc-load.ll
new file mode 100644
index 0000000000..4b5ea67090
--- /dev/null
+++ b/test/CodeGen/Hexagon/postinc-load.ll
@@ -0,0 +1,29 @@
+; RUN: true || llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; Check that post-increment load instructions are being generated.
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}{{ *}}++{{ *}}#4{{ *}})
+
+define i32 @sum(i32* nocapture %a, i16* nocapture %b, i32 %n) nounwind {
+entry:
+ br label %for.body
+
+for.body:
+ %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 10, %entry ]
+ %arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
+ %arrayidx1.phi = phi i16* [ %b, %entry ], [ %arrayidx1.inc, %for.body ]
+ %sum.03 = phi i32 [ 0, %entry ], [ %add2, %for.body ]
+ %0 = load i32* %arrayidx.phi, align 4
+ %1 = load i16* %arrayidx1.phi, align 2
+ %conv = sext i16 %1 to i32
+ %add = add i32 %0, %sum.03
+ %add2 = add i32 %add, %conv
+ %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+ %arrayidx1.inc = getelementptr i16* %arrayidx1.phi, i32 1
+ %lsr.iv.next = add i32 %lsr.iv, -1
+ %exitcond = icmp eq i32 %lsr.iv.next, 0
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret i32 %add2
+}
+
diff --git a/test/CodeGen/MSP430/byval.ll b/test/CodeGen/MSP430/byval.ll
new file mode 100644
index 0000000000..9dda0a097b
--- /dev/null
+++ b/test/CodeGen/MSP430/byval.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"
+target triple = "msp430---elf"
+
+%struct.Foo = type { i16, i16, i16 }
+@foo = global %struct.Foo { i16 1, i16 2, i16 3 }, align 2
+
+define i16 @callee(%struct.Foo* byval %f) nounwind {
+entry:
+; CHECK: callee:
+; CHECK: mov.w 2(r1), r15
+ %0 = getelementptr inbounds %struct.Foo* %f, i32 0, i32 0
+ %1 = load i16* %0, align 2
+ ret i16 %1
+}
+
+define void @caller() nounwind {
+entry:
+; CHECK: caller:
+; CHECK: mov.w &foo+4, 4(r1)
+; CHECK-NEXT: mov.w &foo+2, 2(r1)
+; CHECK-NEXT: mov.w &foo, 0(r1)
+ %call = call i16 @callee(%struct.Foo* byval @foo)
+ ret void
+}
diff --git a/test/CodeGen/MSP430/vararg.ll b/test/CodeGen/MSP430/vararg.ll
new file mode 100644
index 0000000000..603d3ec6b6
--- /dev/null
+++ b/test/CodeGen/MSP430/vararg.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"
+target triple = "msp430---elf"
+
+declare void @llvm.va_start(i8*) nounwind
+declare void @llvm.va_end(i8*) nounwind
+declare void @llvm.va_copy(i8*, i8*) nounwind
+
+define void @va_start(i16 %a, ...) nounwind {
+entry:
+; CHECK: va_start:
+; CHECK: sub.w #2, r1
+ %vl = alloca i8*, align 2
+ %vl1 = bitcast i8** %vl to i8*
+; CHECK-NEXT: mov.w r1, [[REG:r[0-9]+]]
+; CHECK-NEXT: add.w #6, [[REG]]
+; CHECK-NEXT: mov.w [[REG]], 0(r1)
+ call void @llvm.va_start(i8* %vl1)
+ call void @llvm.va_end(i8* %vl1)
+ ret void
+}
+
+define i16 @va_arg(i8* %vl) nounwind {
+entry:
+; CHECK: va_arg:
+ %vl.addr = alloca i8*, align 2
+; CHECK: mov.w r15, 0(r1)
+ store i8* %vl, i8** %vl.addr, align 2
+; CHECK: mov.w r15, [[REG:r[0-9]+]]
+; CHECK-NEXT: add.w #2, [[REG]]
+; CHECK-NEXT: mov.w [[REG]], 0(r1)
+ %0 = va_arg i8** %vl.addr, i16
+; CHECK-NEXT: mov.w 0(r15), r15
+ ret i16 %0
+}
+
+define void @va_copy(i8* %vl) nounwind {
+entry:
+; CHECK: va_copy:
+ %vl.addr = alloca i8*, align 2
+ %vl2 = alloca i8*, align 2
+; CHECK: mov.w r15, 2(r1)
+ store i8* %vl, i8** %vl.addr, align 2
+ %0 = bitcast i8** %vl2 to i8*
+ %1 = bitcast i8** %vl.addr to i8*
+; CHECK-NEXT: mov.w r15, 0(r1)
+ call void @llvm.va_copy(i8* %0, i8* %1)
+ ret void
+}
diff --git a/test/CodeGen/Mips/addressing-mode.ll b/test/CodeGen/Mips/addressing-mode.ll
new file mode 100644
index 0000000000..ea76dde82d
--- /dev/null
+++ b/test/CodeGen/Mips/addressing-mode.ll
@@ -0,0 +1,41 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+@g0 = common global i32 0, align 4
+@g1 = common global i32 0, align 4
+
+; Check that LSR doesn't choose a solution with a formula "reg + 4*reg".
+;
+; CHECK: $BB0_2:
+; CHECK-NOT: sll ${{[0-9]+}}, ${{[0-9]+}}, 2
+
+define i32 @f0(i32 %n, i32 %m, [256 x i32]* nocapture %a, [256 x i32]* nocapture %b) nounwind readonly {
+entry:
+ br label %for.cond1.preheader
+
+for.cond1.preheader:
+ %s.022 = phi i32 [ 0, %entry ], [ %add7, %for.inc9 ]
+ %i.021 = phi i32 [ 0, %entry ], [ %add10, %for.inc9 ]
+ br label %for.body3
+
+for.body3:
+ %s.120 = phi i32 [ %s.022, %for.cond1.preheader ], [ %add7, %for.body3 ]
+ %j.019 = phi i32 [ 0, %for.cond1.preheader ], [ %add8, %for.body3 ]
+ %arrayidx4 = getelementptr inbounds [256 x i32]* %a, i32 %i.021, i32 %j.019
+ %0 = load i32* %arrayidx4, align 4
+ %arrayidx6 = getelementptr inbounds [256 x i32]* %b, i32 %i.021, i32 %j.019
+ %1 = load i32* %arrayidx6, align 4
+ %add = add i32 %0, %s.120
+ %add7 = add i32 %add, %1
+ %add8 = add nsw i32 %j.019, %m
+ %cmp2 = icmp slt i32 %add8, 64
+ br i1 %cmp2, label %for.body3, label %for.inc9
+
+for.inc9:
+ %add10 = add nsw i32 %i.021, %n
+ %cmp = icmp slt i32 %add10, 64
+ br i1 %cmp, label %for.cond1.preheader, label %for.end11
+
+for.end11:
+ ret i32 %add7
+}
+
diff --git a/test/CodeGen/Mips/biggot.ll b/test/CodeGen/Mips/biggot.ll
new file mode 100644
index 0000000000..c4ad851c82
--- /dev/null
+++ b/test/CodeGen/Mips/biggot.ll
@@ -0,0 +1,50 @@
+; RUN: llc -march=mipsel -mxgot < %s | FileCheck %s -check-prefix=O32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=+n64 -mxgot < %s | \
+; RUN: FileCheck %s -check-prefix=N64
+
+@v0 = external global i32
+
+define void @foo1() nounwind {
+entry:
+; O32: lui $[[R0:[0-9]+]], %got_hi(v0)
+; O32: addu $[[R1:[0-9]+]], $[[R0]], ${{[a-z0-9]+}}
+; O32: lw ${{[0-9]+}}, %got_lo(v0)($[[R1]])
+; O32: lui $[[R2:[0-9]+]], %call_hi(foo0)
+; O32: addu $[[R3:[0-9]+]], $[[R2]], ${{[a-z0-9]+}}
+; O32: lw ${{[0-9]+}}, %call_lo(foo0)($[[R3]])
+
+; N64: lui $[[R0:[0-9]+]], %got_hi(v0)
+; N64: daddu $[[R1:[0-9]+]], $[[R0]], ${{[a-z0-9]+}}
+; N64: ld ${{[0-9]+}}, %got_lo(v0)($[[R1]])
+; N64: lui $[[R2:[0-9]+]], %call_hi(foo0)
+; N64: daddu $[[R3:[0-9]+]], $[[R2]], ${{[a-z0-9]+}}
+; N64: ld ${{[0-9]+}}, %call_lo(foo0)($[[R3]])
+
+ %0 = load i32* @v0, align 4
+ tail call void @foo0(i32 %0) nounwind
+ ret void
+}
+
+declare void @foo0(i32)
+
+; call to external function.
+
+define void @foo2(i32* nocapture %d, i32* nocapture %s, i32 %n) nounwind {
+entry:
+; O32: foo2:
+; O32: lui $[[R2:[0-9]+]], %call_hi(memcpy)
+; O32: addu $[[R3:[0-9]+]], $[[R2]], ${{[a-z0-9]+}}
+; O32: lw ${{[0-9]+}}, %call_lo(memcpy)($[[R3]])
+
+; N64: foo2:
+; N64: lui $[[R2:[0-9]+]], %call_hi(memcpy)
+; N64: daddu $[[R3:[0-9]+]], $[[R2]], ${{[a-z0-9]+}}
+; N64: ld ${{[0-9]+}}, %call_lo(memcpy)($[[R3]])
+
+ %0 = bitcast i32* %d to i8*
+ %1 = bitcast i32* %s to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 %n, i32 4, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/Mips/brdelayslot.ll b/test/CodeGen/Mips/brdelayslot.ll
index 2fdb736dc8..897fc9768f 100644
--- a/test/CodeGen/Mips/brdelayslot.ll
+++ b/test/CodeGen/Mips/brdelayslot.ll
@@ -35,3 +35,35 @@ entry:
declare void @foo4(double)
+@g2 = external global i32
+@g1 = external global i32
+@g3 = external global i32
+
+; Check that branch delay slot can be filled with an instruction with operand
+; $1.
+;
+; Default: foo5:
+; Default-NOT: nop
+
+define void @foo5(i32 %a) nounwind {
+entry:
+ %0 = load i32* @g2, align 4
+ %tobool = icmp eq i32 %a, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then:
+ %1 = load i32* @g1, align 4
+ %add = add nsw i32 %1, %0
+ store i32 %add, i32* @g1, align 4
+ br label %if.end
+
+if.else:
+ %2 = load i32* @g3, align 4
+ %sub = sub nsw i32 %2, %0
+ store i32 %sub, i32* @g3, align 4
+ br label %if.end
+
+if.end:
+ ret void
+}
+
diff --git a/test/CodeGen/NVPTX/global-ordering.ll b/test/CodeGen/NVPTX/global-ordering.ll
new file mode 100644
index 0000000000..43394a79e9
--- /dev/null
+++ b/test/CodeGen/NVPTX/global-ordering.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
+
+; Make sure we emit these globals in def-use order
+
+
+; PTX32: .visible .global .align 1 .u8 a = 2;
+; PTX32-NEXT: .visible .global .align 4 .u32 a2 = a;
+; PTX64: .visible .global .align 1 .u8 a = 2;
+; PTX64-NEXT: .visible .global .align 8 .u64 a2 = a;
+@a2 = addrspace(1) global i8 addrspace(1)* @a
+@a = addrspace(1) global i8 2
+
+
+; PTX32: .visible .global .align 1 .u8 b = 1;
+; PTX32-NEXT: .visible .global .align 4 .u32 b2[2] = {b, b};
+; PTX64: .visible .global .align 1 .u8 b = 1;
+; PTX64-NEXT: .visible .global .align 8 .u64 b2[2] = {b, b};
+@b2 = addrspace(1) global [2 x i8 addrspace(1)*] [i8 addrspace(1)* @b, i8 addrspace(1)* @b]
+@b = addrspace(1) global i8 1
diff --git a/test/CodeGen/NVPTX/pr13291-i1-store.ll b/test/CodeGen/NVPTX/pr13291-i1-store.ll
new file mode 100644
index 0000000000..779f7798d8
--- /dev/null
+++ b/test/CodeGen/NVPTX/pr13291-i1-store.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
+
+define ptx_kernel void @t1(i1* %a) {
+; PTX32: mov.u16 %rc{{[0-9]+}}, 0;
+; PTX32-NEXT: st.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}};
+; PTX64: mov.u16 %rc{{[0-9]+}}, 0;
+; PTX64-NEXT: st.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}};
+ store i1 false, i1* %a
+ ret void
+}
+
+
+define ptx_kernel void @t2(i1* %a, i8* %b) {
+; PTX32: ld.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: and.b16 temp, %rc{{[0-9]+}}, 1;
+; PTX32: setp.b16.eq %p{{[0-9]+}}, temp, 1;
+; PTX64: ld.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: and.b16 temp, %rc{{[0-9]+}}, 1;
+; PTX64: setp.b16.eq %p{{[0-9]+}}, temp, 1;
+
+ %t1 = load i1* %a
+ %t2 = select i1 %t1, i8 1, i8 2
+ store i8 %t2, i8* %b
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll b/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll
index 9d2e390c1c..5bff58f2bb 100644
--- a/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll
+++ b/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -code-model=small < %s | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
diff --git a/test/CodeGen/PowerPC/2012-10-11-dynalloc.ll b/test/CodeGen/PowerPC/2012-10-11-dynalloc.ll
new file mode 100644
index 0000000000..41533a8f32
--- /dev/null
+++ b/test/CodeGen/PowerPC/2012-10-11-dynalloc.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @test(i64 %n) nounwind {
+entry:
+ %0 = alloca i8, i64 %n, align 1
+ %1 = alloca i8, i64 %n, align 1
+ call void @use(i8* %0, i8* %1) nounwind
+ ret void
+}
+
+declare void @use(i8*, i8*)
+
+; Check we actually have two instances of dynamic stack allocation,
+; identified by the stdux used to update the back-chain link.
+; CHECK: stdux
+; CHECK: stdux
diff --git a/test/CodeGen/PowerPC/2012-11-16-mischedcall.ll b/test/CodeGen/PowerPC/2012-11-16-mischedcall.ll
new file mode 100644
index 0000000000..35e3fdd26e
--- /dev/null
+++ b/test/CodeGen/PowerPC/2012-11-16-mischedcall.ll
@@ -0,0 +1,33 @@
+; RUN: llc -mtriple=powerpc64-bgq-linux -enable-misched < %s | FileCheck %s
+;
+; PR14315: misched should not move the physreg copy of %t below the calls.
+
+@.str89 = external unnamed_addr constant [6 x i8], align 1
+
+declare void @init() nounwind
+
+declare void @clock() nounwind
+
+; CHECK: %entry
+; CHECK: fmr 31, 1
+; CHECK: bl init
+define void @s332(double %t) nounwind {
+entry:
+ tail call void @init()
+ tail call void @clock() nounwind
+ br label %for.cond2
+
+for.cond2: ; preds = %for.body4, %entry
+ %i.0 = phi i32 [ %inc, %for.body4 ], [ 0, %entry ]
+ %cmp3 = icmp slt i32 undef, 16000
+ br i1 %cmp3, label %for.body4, label %L20
+
+for.body4: ; preds = %for.cond2
+ %cmp5 = fcmp ogt double undef, %t
+ %inc = add nsw i32 %i.0, 1
+ br i1 %cmp5, label %L20, label %for.cond2
+
+L20: ; preds = %for.body4, %for.cond2
+ %index.0 = phi i32 [ -2, %for.cond2 ], [ %i.0, %for.body4 ]
+ unreachable
+}
diff --git a/test/CodeGen/PowerPC/available-externally.ll b/test/CodeGen/PowerPC/available-externally.ll
index fdead7dd8b..abed0de80b 100644
--- a/test/CodeGen/PowerPC/available-externally.ll
+++ b/test/CodeGen/PowerPC/available-externally.ll
@@ -1,6 +1,8 @@
; RUN: llc < %s -relocation-model=static | FileCheck %s -check-prefix=STATIC
; RUN: llc < %s -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -relocation-model=pic -mtriple=powerpc64-apple-darwin8 | FileCheck %s -check-prefix=PIC64
; RUN: llc < %s -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC
+; RUN: llc < %s -relocation-model=dynamic-no-pic -mtriple=powerpc64-apple-darwin8 | FileCheck %s -check-prefix=DYNAMIC64
; PR4482
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "powerpc-apple-darwin8"
@@ -16,10 +18,18 @@ entry:
; PIC: bl L_exact_log2$stub
; PIC: blr
+; PIC64: _foo:
+; PIC64: bl L_exact_log2$stub
+; PIC64: blr
+
; DYNAMIC: _foo:
; DYNAMIC: bl L_exact_log2$stub
; DYNAMIC: blr
+; DYNAMIC64: _foo:
+; DYNAMIC64: bl L_exact_log2$stub
+; DYNAMIC64: blr
+
%A = call i32 @exact_log2(i64 %x) nounwind
ret i32 %A
}
@@ -34,13 +44,13 @@ entry:
; PIC: L_exact_log2$stub:
; PIC: .indirect_symbol _exact_log2
; PIC: mflr r0
-; PIC: bcl 20,31,L_exact_log2$stub$tmp
+; PIC: bcl 20, 31, L_exact_log2$stub$tmp
; PIC: L_exact_log2$stub$tmp:
; PIC: mflr r11
-; PIC: addis r11,r11,ha16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)
+; PIC: addis r11, r11, ha16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)
; PIC: mtlr r0
-; PIC: lwzu r12,lo16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)(r11)
+; PIC: lwzu r12, lo16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)(r11)
; PIC: mtctr r12
; PIC: bctr
@@ -51,12 +61,32 @@ entry:
; PIC: .subsections_via_symbols
+; PIC64: .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+; PIC64: L_exact_log2$stub:
+; PIC64: .indirect_symbol _exact_log2
+; PIC64: mflr r0
+; PIC64: bcl 20, 31, L_exact_log2$stub$tmp
+
+; PIC64: L_exact_log2$stub$tmp:
+; PIC64: mflr r11
+; PIC64: addis r11, r11, ha16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)
+; PIC64: mtlr r0
+; PIC64: ldu r12, lo16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)(r11)
+; PIC64: mtctr r12
+; PIC64: bctr
+
+; PIC64: .section __DATA,__la_symbol_ptr,lazy_symbol_pointers
+; PIC64: L_exact_log2$lazy_ptr:
+; PIC64: .indirect_symbol _exact_log2
+; PIC64: .quad dyld_stub_binding_helper
+
+; PIC64: .subsections_via_symbols
; DYNAMIC: .section __TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
; DYNAMIC: L_exact_log2$stub:
; DYNAMIC: .indirect_symbol _exact_log2
-; DYNAMIC: lis r11,ha16(L_exact_log2$lazy_ptr)
-; DYNAMIC: lwzu r12,lo16(L_exact_log2$lazy_ptr)(r11)
+; DYNAMIC: lis r11, ha16(L_exact_log2$lazy_ptr)
+; DYNAMIC: lwzu r12, lo16(L_exact_log2$lazy_ptr)(r11)
; DYNAMIC: mtctr r12
; DYNAMIC: bctr
@@ -65,7 +95,15 @@ entry:
; DYNAMIC: .indirect_symbol _exact_log2
; DYNAMIC: .long dyld_stub_binding_helper
-
-
-
-
+; DYNAMIC64: .section __TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
+; DYNAMIC64: L_exact_log2$stub:
+; DYNAMIC64: .indirect_symbol _exact_log2
+; DYNAMIC64: lis r11, ha16(L_exact_log2$lazy_ptr)
+; DYNAMIC64: ldu r12, lo16(L_exact_log2$lazy_ptr)(r11)
+; DYNAMIC64: mtctr r12
+; DYNAMIC64: bctr
+
+; DYNAMIC64: .section __DATA,__la_symbol_ptr,lazy_symbol_pointers
+; DYNAMIC64: L_exact_log2$lazy_ptr:
+; DYNAMIC64: .indirect_symbol _exact_log2
+; DYNAMIC64: .quad dyld_stub_binding_helper
diff --git a/test/CodeGen/PowerPC/i64_fp_round.ll b/test/CodeGen/PowerPC/i64_fp_round.ll
index 5a0c072c9c..5ae1be8953 100644
--- a/test/CodeGen/PowerPC/i64_fp_round.ll
+++ b/test/CodeGen/PowerPC/i64_fp_round.ll
@@ -12,11 +12,11 @@ entry:
; Note that only parts of the sequence are checked for here, to allow
; for minor code generation differences.
-; CHECK: sradi [[REGISTER:[0-9]+]], 3, 53
-; CHECK: addi [[REGISTER:[0-9]+]], [[REGISTER]], 1
-; CHECK: cmpldi 0, [[REGISTER]], 1
-; CHECK: isel [[REGISTER:[0-9]+]], {{[0-9]+}}, 3, 1
-; CHECK: std [[REGISTER]], -{{[0-9]+}}(1)
+; CHECK: sradi [[REG1:[0-9]+]], 3, 53
+; CHECK: addi [[REG2:[0-9]+]], [[REG1]], 1
+; CHECK: cmpldi 0, [[REG2]], 1
+; CHECK: isel [[REG3:[0-9]+]], {{[0-9]+}}, 3, 1
+; CHECK: std [[REG3]], -{{[0-9]+}}(1)
; Also check that with -enable-unsafe-fp-math we do not get that extra
diff --git a/test/CodeGen/PowerPC/mem_update.ll b/test/CodeGen/PowerPC/mem_update.ll
index 39af11a3d5..fcf53da67f 100644
--- a/test/CodeGen/PowerPC/mem_update.ll
+++ b/test/CodeGen/PowerPC/mem_update.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=ppc32 | \
; RUN: not grep addi
-; RUN: llc < %s -march=ppc64 | \
+; RUN: llc -code-model=small < %s -march=ppc64 | \
; RUN: not grep addi
@Glob = global i64 4
diff --git a/test/CodeGen/PowerPC/ppc64-toc.ll b/test/CodeGen/PowerPC/ppc64-toc.ll
index a29bdcb250..7f30ef883e 100644
--- a/test/CodeGen/PowerPC/ppc64-toc.ll
+++ b/test/CodeGen/PowerPC/ppc64-toc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -code-model=small < %s | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
diff --git a/test/CodeGen/PowerPC/stubs.ll b/test/CodeGen/PowerPC/stubs.ll
index 4889263b4c..cfcc50b7a8 100644
--- a/test/CodeGen/PowerPC/stubs.ll
+++ b/test/CodeGen/PowerPC/stubs.ll
@@ -10,8 +10,8 @@ entry:
; CHECK: .section __TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
; CHECK: ___floatditf$stub:
; CHECK: .indirect_symbol ___floatditf
-; CHECK: lis r11,ha16(___floatditf$lazy_ptr)
-; CHECK: lwzu r12,lo16(___floatditf$lazy_ptr)(r11)
+; CHECK: lis r11, ha16(___floatditf$lazy_ptr)
+; CHECK: lwzu r12, lo16(___floatditf$lazy_ptr)(r11)
; CHECK: mtctr r12
; CHECK: bctr
; CHECK: .section __DATA,__la_symbol_ptr,lazy_symbol_pointers
diff --git a/test/CodeGen/PowerPC/vec_extload.ll b/test/CodeGen/PowerPC/vec_extload.ll
index 201c15b9c7..15a3f9f295 100644
--- a/test/CodeGen/PowerPC/vec_extload.ll
+++ b/test/CodeGen/PowerPC/vec_extload.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
+; RUN: llc -mcpu=pwr6 -mattr=+altivec -code-model=small < %s | FileCheck %s
; Check vector extend load expansion with altivec enabled.
diff --git a/test/CodeGen/PowerPC/vec_rounding.ll b/test/CodeGen/PowerPC/vec_rounding.ll
new file mode 100644
index 0000000000..f41faa0339
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_rounding.ll
@@ -0,0 +1,172 @@
+; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
+
+; Check vector round to single-precision toward -infinity (vrfim)
+; instruction generation using Altivec.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <2 x double> @llvm.floor.v2f64(<2 x double> %p)
+define <2 x double> @floor_v2f64(<2 x double> %p)
+{
+ %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
+ ret <2 x double> %t
+}
+; CHECK: floor_v2f64:
+; CHECK: bl floor
+; CHECK: bl floor
+
+declare <4 x double> @llvm.floor.v4f64(<4 x double> %p)
+define <4 x double> @floor_v4f64(<4 x double> %p)
+{
+ %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
+ ret <4 x double> %t
+}
+; CHECK: floor_v4f64:
+; CHECK: bl floor
+; CHECK: bl floor
+; CHECK: bl floor
+; CHECK: bl floor
+
+declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
+define <2 x double> @ceil_v2f64(<2 x double> %p)
+{
+ %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
+ ret <2 x double> %t
+}
+; CHECK: ceil_v2f64:
+; CHECK: bl ceil
+; CHECK: bl ceil
+
+declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
+define <4 x double> @ceil_v4f64(<4 x double> %p)
+{
+ %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
+ ret <4 x double> %t
+}
+; CHECK: ceil_v4f64:
+; CHECK: bl ceil
+; CHECK: bl ceil
+; CHECK: bl ceil
+; CHECK: bl ceil
+
+declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
+define <2 x double> @trunc_v2f64(<2 x double> %p)
+{
+ %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
+ ret <2 x double> %t
+}
+; CHECK: trunc_v2f64:
+; CHECK: bl trunc
+; CHECK: bl trunc
+
+declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
+define <4 x double> @trunc_v4f64(<4 x double> %p)
+{
+ %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
+ ret <4 x double> %t
+}
+; CHECK: trunc_v4f64:
+; CHECK: bl trunc
+; CHECK: bl trunc
+; CHECK: bl trunc
+; CHECK: bl trunc
+
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
+define <2 x double> @nearbyint_v2f64(<2 x double> %p)
+{
+ %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
+ ret <2 x double> %t
+}
+; CHECK: nearbyint_v2f64:
+; CHECK: bl nearbyint
+; CHECK: bl nearbyint
+
+declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
+define <4 x double> @nearbyint_v4f64(<4 x double> %p)
+{
+ %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
+ ret <4 x double> %t
+}
+; CHECK: nearbyint_v4f64:
+; CHECK: bl nearbyint
+; CHECK: bl nearbyint
+; CHECK: bl nearbyint
+; CHECK: bl nearbyint
+
+
+declare <4 x float> @llvm.floor.v4f32(<4 x float> %p)
+define <4 x float> @floor_v4f32(<4 x float> %p)
+{
+ %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
+ ret <4 x float> %t
+}
+; CHECK: floor_v4f32:
+; CHECK: vrfim
+
+declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
+define <8 x float> @floor_v8f32(<8 x float> %p)
+{
+ %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
+ ret <8 x float> %t
+}
+; CHECK: floor_v8f32:
+; CHECK: vrfim
+; CHECK: vrfim
+
+declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
+define <4 x float> @ceil_v4f32(<4 x float> %p)
+{
+ %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
+ ret <4 x float> %t
+}
+; CHECK: ceil_v4f32:
+; CHECK: vrfip
+
+declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
+define <8 x float> @ceil_v8f32(<8 x float> %p)
+{
+ %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
+ ret <8 x float> %t
+}
+; CHECK: ceil_v8f32:
+; CHECK: vrfip
+; CHECK: vrfip
+
+declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
+define <4 x float> @trunc_v4f32(<4 x float> %p)
+{
+ %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
+ ret <4 x float> %t
+}
+; CHECK: trunc_v4f32:
+; CHECK: vrfiz
+
+declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
+define <8 x float> @trunc_v8f32(<8 x float> %p)
+{
+ %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
+ ret <8 x float> %t
+}
+; CHECK: trunc_v8f32:
+; CHECK: vrfiz
+; CHECK: vrfiz
+
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
+define <4 x float> @nearbyint_v4f32(<4 x float> %p)
+{
+ %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
+ ret <4 x float> %t
+}
+; CHECK: nearbyint_v4f32:
+; CHECK: vrfin
+
+declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
+define <8 x float> @nearbyint_v8f32(<8 x float> %p)
+{
+ %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
+ ret <8 x float> %t
+}
+; CHECK: nearbyint_v8f32:
+; CHECK: vrfin
+; CHECK: vrfin
diff --git a/test/CodeGen/X86/2008-10-27-StackRealignment.ll b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
deleted file mode 100644
index a57f7166ca..0000000000
--- a/test/CodeGen/X86/2008-10-27-StackRealignment.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; Linux doesn't support stack realignment for functions with allocas (PR2888).
-; Until it does, we shouldn't use movaps to access the stack. On targets with
-; sufficiently aligned stack (e.g. darwin) we should.
-; PR8969 - make 32-bit linux have a 16-byte aligned stack
-; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=yonah | grep movaps | count 2
-; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=yonah | grep movaps | count 2
-
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-
-define void @foo(i32 %t) nounwind {
- %tmp1210 = alloca i8, i32 32, align 4
- call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 32, i32 4, i1 false)
- %x = alloca i8, i32 %t
- call void @dummy(i8* %x)
- ret void
-}
-
-declare void @dummy(i8*)
-
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
index 7d1cda35a2..3d058bc289 100644
--- a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
+++ b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
@@ -10,10 +10,10 @@ entry:
; CHECK: movl ([[REG:%[a-z]+]]), %eax
; CHECK: movl 4([[REG]]), %edx
; CHECK: LBB0_1:
-; CHECK: movl $1
-; CHECK: addl
-; CHECK: movl $0
-; CHECK: adcl
+; CHECK: movl %eax, %ebx
+; CHECK: addl {{%[a-z]+}}, %ebx
+; CHECK: movl %edx, %ecx
+; CHECK: adcl {{%[a-z]+}}, %ecx
; CHECK: lock
; CHECK-NEXT: cmpxchg8b ([[REG]])
; CHECK-NEXT: jne
diff --git a/test/CodeGen/X86/avx-intel-ocl.ll b/test/CodeGen/X86/avx-intel-ocl.ll
index 1446b36a0f..0fec9658d6 100644
--- a/test/CodeGen/X86/avx-intel-ocl.ll
+++ b/test/CodeGen/X86/avx-intel-ocl.ll
@@ -1,9 +1,12 @@
-; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=WIN32 %s
+; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=X32 %s
; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=WIN64 %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=NOT_WIN %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=X64 %s
declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
declare <16 x float> @func_float16(<16 x float>, <16 x float>)
+declare i32 @func_int(i32, i32)
+
; WIN64: testf16_inp
; WIN64: vaddps {{.*}}, {{%ymm[0-1]}}
; WIN64: vaddps {{.*}}, {{%ymm[0-1]}}
@@ -11,19 +14,19 @@ declare <16 x float> @func_float16(<16 x float>, <16 x float>)
; WIN64: call
; WIN64: ret
-; WIN32: testf16_inp
-; WIN32: movl %eax, (%esp)
-; WIN32: vaddps {{.*}}, {{%ymm[0-1]}}
-; WIN32: vaddps {{.*}}, {{%ymm[0-1]}}
-; WIN32: call
-; WIN32: ret
+; X32: testf16_inp
+; X32: movl %eax, (%esp)
+; X32: vaddps {{.*}}, {{%ymm[0-1]}}
+; X32: vaddps {{.*}}, {{%ymm[0-1]}}
+; X32: call
+; X32: ret
-; NOT_WIN: testf16_inp
-; NOT_WIN: vaddps {{.*}}, {{%ymm[0-1]}}
-; NOT_WIN: vaddps {{.*}}, {{%ymm[0-1]}}
-; NOT_WIN: leaq {{.*}}(%rsp), %rdi
-; NOT_WIN: call
-; NOT_WIN: ret
+; X64: testf16_inp
+; X64: vaddps {{.*}}, {{%ymm[0-1]}}
+; X64: vaddps {{.*}}, {{%ymm[0-1]}}
+; X64: leaq {{.*}}(%rsp), %rdi
+; X64: call
+; X64: ret
;test calling conventions - input parameters
define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
@@ -45,11 +48,11 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
; WIN64: ret
; preserved ymm8-ymm15
-; NOT_WIN: testf16_regs
-; NOT_WIN: call
-; NOT_WIN: vaddps {{%ymm[8-9]}}, %ymm0, %ymm0
-; NOT_WIN: vaddps {{%ymm[8-9]}}, %ymm1, %ymm1
-; NOT_WIN: ret
+; X64: testf16_regs
+; X64: call
+; X64: vaddps {{%ymm[8-9]}}, %ymm0, %ymm0
+; X64: vaddps {{%ymm[8-9]}}, %ymm1, %ymm1
+; X64: ret
define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
%y = alloca <16 x float>, align 16
@@ -84,24 +87,43 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; NOT_WIN: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; NOT_WIN: call
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; NOT_WIN: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: call
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
%c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
ret <16 x float> %c
}
+
+; test functions with integer parameters
+; pass parameters on stack for 32-bit platform
+; X32: movl {{.*}}, 4(%esp)
+; X32: movl {{.*}}, (%esp)
+; X32: call
+; X32: addl {{.*}}, %eax
+
+; pass parameters in registers for 64-bit platform
+; X64: leal {{.*}}, %edi
+; X64: movl {{.*}}, %esi
+; X64: call
+; X64: addl {{.*}}, %eax
+define i32 @test_int(i32 %a, i32 %b) nounwind {
+ %c1 = add i32 %a, %b
+ %c2 = call intel_ocl_bicc i32 @func_int(i32 %c1, i32 %a)
+ %c = add i32 %c2, %b
+ ret i32 %c
+}
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index ec11654b35..904f048d1e 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -246,3 +246,54 @@ define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind {
ret <8 x float>%S
}
+; rdar://12684358
+; Make sure loads happen before stores.
+; CHECK: swap8doubles
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
+; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
+; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
+; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
+; CHECK: vextractf128
+; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
+; CHECK: vextractf128
+; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi)
+; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi)
+define void @swap8doubles(double* nocapture %A, double* nocapture %C) nounwind uwtable ssp {
+entry:
+ %add.ptr = getelementptr inbounds double* %A, i64 2
+ %v.i = bitcast double* %A to <2 x double>*
+ %0 = load <2 x double>* %v.i, align 1
+ %shuffle.i.i = shufflevector <2 x double> %0, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+ %v1.i = bitcast double* %add.ptr to <2 x double>*
+ %1 = load <2 x double>* %v1.i, align 1
+ %2 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i, <2 x double> %1, i8 1) nounwind
+ %add.ptr1 = getelementptr inbounds double* %A, i64 6
+ %add.ptr2 = getelementptr inbounds double* %A, i64 4
+ %v.i27 = bitcast double* %add.ptr2 to <2 x double>*
+ %3 = load <2 x double>* %v.i27, align 1
+ %shuffle.i.i28 = shufflevector <2 x double> %3, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+ %v1.i29 = bitcast double* %add.ptr1 to <2 x double>*
+ %4 = load <2 x double>* %v1.i29, align 1
+ %5 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i28, <2 x double> %4, i8 1) nounwind
+ %6 = bitcast double* %C to <4 x double>*
+ %7 = load <4 x double>* %6, align 32
+ %add.ptr5 = getelementptr inbounds double* %C, i64 4
+ %8 = bitcast double* %add.ptr5 to <4 x double>*
+ %9 = load <4 x double>* %8, align 32
+ %shuffle.i26 = shufflevector <4 x double> %7, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+ %10 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %7, i8 1)
+ %shuffle.i = shufflevector <4 x double> %9, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+ %11 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %9, i8 1)
+ store <2 x double> %shuffle.i26, <2 x double>* %v.i, align 16
+ store <2 x double> %10, <2 x double>* %v1.i, align 16
+ store <2 x double> %shuffle.i, <2 x double>* %v.i27, align 16
+ store <2 x double> %11, <2 x double>* %v1.i29, align 16
+ store <4 x double> %2, <4 x double>* %6, align 32
+ store <4 x double> %5, <4 x double>* %8, align 32
+ ret void
+}
+declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
+declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index 94bcddd975..5ad75236e1 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -47,9 +47,9 @@ entry:
; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
; To:
; shuffle (vload ptr)), undef, <1, 1, 1, 1>
-; CHECK: vmovaps
+; CHECK: vmovdqa
+; CHECK-NEXT: vpshufd $-1
; CHECK-NEXT: vinsertf128 $1
-; CHECK-NEXT: vpermilps $-1
define <8 x float> @funcE() nounwind {
allocas:
%udx495 = alloca [18 x [18 x float]], align 32
@@ -75,8 +75,8 @@ __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_ex
ret <8 x float> %load_broadcast12281250
}
-; CHECK: vinsertf128 $1
-; CHECK-NEXT: vpermilps $0
+; CHECK: vpshufd $0
+; CHECK-NEXT: vinsertf128 $1
define <8 x float> @funcF(i32 %val) nounwind {
%ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
%ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
@@ -84,8 +84,8 @@ define <8 x float> @funcF(i32 %val) nounwind {
ret <8 x float> %tmp
}
-; CHECK: vinsertf128 $1
-; CHECK-NEXT: vpermilps $0
+; CHECK: vpermilps $0
+; CHECK-NEXT: vinsertf128 $1
define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -93,8 +93,8 @@ entry:
}
; CHECK: vextractf128 $1
-; CHECK-NEXT: vinsertf128 $1
; CHECK-NEXT: vpermilps $85
+; CHECK-NEXT: vinsertf128 $1
define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
diff --git a/test/CodeGen/X86/byval2.ll b/test/CodeGen/X86/byval2.ll
index 196efe58e6..c5187db6de 100644
--- a/test/CodeGen/X86/byval2.ll
+++ b/test/CodeGen/X86/byval2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=-avx | FileCheck %s -check-prefix=X64
; X64-NOT: movsq
; X64: rep
; X64-NOT: rep
@@ -12,7 +12,7 @@
; Win64 has not supported byval yet.
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86 -mattr=-avx | FileCheck %s -check-prefix=X32
; X32-NOT: movsl
; X32: rep
; X32-NOT: rep
diff --git a/test/CodeGen/X86/byval3.ll b/test/CodeGen/X86/byval3.ll
index f3b125c6e3..d06fd8898e 100644
--- a/test/CodeGen/X86/byval3.ll
+++ b/test/CodeGen/X86/byval3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=-avx | FileCheck %s -check-prefix=X64
; X64-NOT: movsq
; X64: rep
; X64-NOT: rep
@@ -12,7 +12,7 @@
; Win64 has not supported byval yet.
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86 -mattr=-avx | FileCheck %s -check-prefix=X32
; X32-NOT: movsl
; X32: rep
; X32-NOT: rep
diff --git a/test/CodeGen/X86/byval4.ll b/test/CodeGen/X86/byval4.ll
index b7a4aa3f9b..4711e45111 100644
--- a/test/CodeGen/X86/byval4.ll
+++ b/test/CodeGen/X86/byval4.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=-avx | FileCheck %s -check-prefix=X64
; X64-NOT: movsq
; X64: rep
; X64-NOT: rep
@@ -12,7 +12,7 @@
; Win64 has not supported byval yet.
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86 -mattr=-avx | FileCheck %s -check-prefix=X32
; X32-NOT: movsl
; X32: rep
; X32-NOT: rep
diff --git a/test/CodeGen/X86/byval5.ll b/test/CodeGen/X86/byval5.ll
index dca0936022..f24a5f9aa3 100644
--- a/test/CodeGen/X86/byval5.ll
+++ b/test/CodeGen/X86/byval5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=-avx | FileCheck %s -check-prefix=X64
; X64-NOT: movsq
; X64: rep
; X64-NOT: rep
@@ -12,7 +12,7 @@
; Win64 has not supported byval yet.
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86 -mattr=-avx | FileCheck %s -check-prefix=X32
; X32-NOT: movsl
; X32: rep
; X32-NOT: rep
diff --git a/test/CodeGen/X86/dynamic-allocas-VLAs.ll b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
index c5e47facf3..9405f76cbe 100644
--- a/test/CodeGen/X86/dynamic-allocas-VLAs.ll
+++ b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
@@ -103,7 +103,7 @@ entry:
declare void @t4_helper(i32*, i32*, <8 x float>*)
-; Dynamic realignment + Spill
+; Spilling an AVX register shouldn't cause dynamic realignment
define i32 @t5(float* nocapture %f) nounwind uwtable ssp {
entry:
%a = alloca i32, align 4
@@ -116,21 +116,15 @@ entry:
ret i32 %add
; CHECK: _t5
-; CHECK: pushq %rbp
-; CHECK: movq %rsp, %rbp
-; CHECK: andq $-32, %rsp
; CHECK: subq ${{[0-9]+}}, %rsp
;
; CHECK: vmovaps (%rdi), [[AVXREG:%ymm[0-9]+]]
-; CHECK: vmovaps [[AVXREG]], (%rsp)
+; CHECK: vmovups [[AVXREG]], (%rsp)
; CHECK: leaq {{[0-9]+}}(%rsp), %rdi
; CHECK: callq _t5_helper1
-; CHECK: vmovaps (%rsp), %ymm0
+; CHECK: vmovups (%rsp), %ymm0
; CHECK: callq _t5_helper2
; CHECK: movl {{[0-9]+}}(%rsp), %eax
-;
-; CHECK: movq %rbp, %rsp
-; CHECK: popq %rbp
}
declare void @t5_helper1(i32*)
diff --git a/test/CodeGen/X86/hipe-cc.ll b/test/CodeGen/X86/hipe-cc.ll
new file mode 100644
index 0000000000..76d17a09d5
--- /dev/null
+++ b/test/CodeGen/X86/hipe-cc.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -tailcallopt -code-model=medium -stack-alignment=4 -mtriple=i686-linux-gnu -mcpu=pentium | FileCheck %s
+
+; Check the HiPE calling convention works (x86-32)
+
+define void @zap(i32 %a, i32 %b) nounwind {
+entry:
+ ; CHECK: movl 40(%esp), %eax
+ ; CHECK-NEXT: movl 44(%esp), %edx
+ ; CHECK-NEXT: movl $8, %ecx
+ ; CHECK-NEXT: calll addfour
+ %0 = call cc 11 {i32, i32, i32} @addfour(i32 undef, i32 undef, i32 %a, i32 %b, i32 8)
+ %res = extractvalue {i32, i32, i32} %0, 2
+
+ ; CHECK: movl %eax, 16(%esp)
+ ; CHECK-NEXT: movl $2, 12(%esp)
+ ; CHECK-NEXT: movl $1, 8(%esp)
+ ; CHECK: calll foo
+ tail call void @foo(i32 undef, i32 undef, i32 1, i32 2, i32 %res) nounwind
+ ret void
+}
+
+define cc 11 {i32, i32, i32} @addfour(i32 %hp, i32 %p, i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+ ; CHECK: addl %edx, %eax
+ ; CHECK-NEXT: addl %ecx, %eax
+ %0 = add i32 %x, %y
+ %1 = add i32 %0, %z
+
+ ; CHECK: ret
+ %res = insertvalue {i32, i32, i32} undef, i32 %1, 2
+ ret {i32, i32, i32} %res
+}
+
+define cc 11 void @foo(i32 %hp, i32 %p, i32 %arg0, i32 %arg1, i32 %arg2) nounwind {
+entry:
+ ; CHECK: movl %esi, 16(%esp)
+ ; CHECK-NEXT: movl %ebp, 12(%esp)
+ ; CHECK-NEXT: movl %eax, 8(%esp)
+ ; CHECK-NEXT: movl %edx, 4(%esp)
+ ; CHECK-NEXT: movl %ecx, (%esp)
+ %hp_var = alloca i32
+ %p_var = alloca i32
+ %arg0_var = alloca i32
+ %arg1_var = alloca i32
+ %arg2_var = alloca i32
+ store i32 %hp, i32* %hp_var
+ store i32 %p, i32* %p_var
+ store i32 %arg0, i32* %arg0_var
+ store i32 %arg1, i32* %arg1_var
+ store i32 %arg2, i32* %arg2_var
+
+ ; CHECK: movl 4(%esp), %edx
+ ; CHECK-NEXT: movl 8(%esp), %eax
+ ; CHECK-NEXT: movl 12(%esp), %ebp
+ ; CHECK-NEXT: movl 16(%esp), %esi
+ %0 = load i32* %hp_var
+ %1 = load i32* %p_var
+ %2 = load i32* %arg0_var
+ %3 = load i32* %arg1_var
+ %4 = load i32* %arg2_var
+ ; CHECK: jmp bar
+ tail call cc 11 void @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) nounwind
+ ret void
+}
+
+define cc 11 void @baz() nounwind {
+ %tmp_clos = load i32* @clos
+ %tmp_clos2 = inttoptr i32 %tmp_clos to i32*
+ %indirect_call = bitcast i32* %tmp_clos2 to void (i32, i32, i32)*
+ ; CHECK: movl $42, %eax
+ ; CHECK-NEXT: jmpl *clos
+ tail call cc 11 void %indirect_call(i32 undef, i32 undef, i32 42) nounwind
+ ret void
+}
+
+@clos = external constant i32
+declare cc 11 void @bar(i32, i32, i32, i32, i32)
diff --git a/test/CodeGen/X86/hipe-cc64.ll b/test/CodeGen/X86/hipe-cc64.ll
new file mode 100644
index 0000000000..5dbb5a25cb
--- /dev/null
+++ b/test/CodeGen/X86/hipe-cc64.ll
@@ -0,0 +1,87 @@
+; RUN: llc < %s -tailcallopt -code-model=medium -stack-alignment=8 -mtriple=x86_64-linux-gnu -mcpu=opteron | FileCheck %s
+
+; Check the HiPE calling convention works (x86-64)
+
+define void @zap(i64 %a, i64 %b) nounwind {
+entry:
+ ; CHECK: movq %rsi, %rax
+ ; CHECK-NEXT: movq %rdi, %rsi
+ ; CHECK-NEXT: movq %rax, %rdx
+ ; CHECK-NEXT: movl $8, %ecx
+ ; CHECK-NEXT: movl $9, %r8d
+ ; CHECK-NEXT: callq addfour
+ %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9)
+ %res = extractvalue {i64, i64, i64} %0, 2
+
+ ; CHECK: movl $1, %edx
+ ; CHECK-NEXT: movl $2, %ecx
+ ; CHECK-NEXT: movl $3, %r8d
+ ; CHECK-NEXT: movq %rax, %r9
+ ; CHECK: callq foo
+ tail call void @foo(i64 undef, i64 undef, i64 1, i64 2, i64 3, i64 %res) nounwind
+ ret void
+}
+
+define cc 11 {i64, i64, i64} @addfour(i64 %hp, i64 %p, i64 %x, i64 %y, i64 %z, i64 %w) nounwind {
+entry:
+ ; CHECK: leaq (%rsi,%rdx), %rax
+ ; CHECK-NEXT: addq %rcx, %rax
+ ; CHECK-NEXT: addq %r8, %rax
+ %0 = add i64 %x, %y
+ %1 = add i64 %0, %z
+ %2 = add i64 %1, %w
+
+ ; CHECK: ret
+ %res = insertvalue {i64, i64, i64} undef, i64 %2, 2
+ ret {i64, i64, i64} %res
+}
+
+define cc 11 void @foo(i64 %hp, i64 %p, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) nounwind {
+entry:
+ ; CHECK: movq %r15, 40(%rsp)
+ ; CHECK-NEXT: movq %rbp, 32(%rsp)
+ ; CHECK-NEXT: movq %rsi, 24(%rsp)
+ ; CHECK-NEXT: movq %rdx, 16(%rsp)
+ ; CHECK-NEXT: movq %rcx, 8(%rsp)
+ ; CHECK-NEXT: movq %r8, (%rsp)
+ %hp_var = alloca i64
+ %p_var = alloca i64
+ %arg0_var = alloca i64
+ %arg1_var = alloca i64
+ %arg2_var = alloca i64
+ %arg3_var = alloca i64
+ store i64 %hp, i64* %hp_var
+ store i64 %p, i64* %p_var
+ store i64 %arg0, i64* %arg0_var
+ store i64 %arg1, i64* %arg1_var
+ store i64 %arg2, i64* %arg2_var
+ store i64 %arg3, i64* %arg3_var
+
+ ; CHECK: movq 8(%rsp), %rcx
+ ; CHECK-NEXT: movq 16(%rsp), %rdx
+ ; CHECK-NEXT: movq 24(%rsp), %rsi
+ ; CHECK-NEXT: movq 32(%rsp), %rbp
+ ; CHECK-NEXT: movq 40(%rsp), %r15
+ %0 = load i64* %hp_var
+ %1 = load i64* %p_var
+ %2 = load i64* %arg0_var
+ %3 = load i64* %arg1_var
+ %4 = load i64* %arg2_var
+ %5 = load i64* %arg3_var
+ ; CHECK: jmp bar
+ tail call cc 11 void @bar(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) nounwind
+ ret void
+}
+
+define cc 11 void @baz() nounwind {
+ %tmp_clos = load i64* @clos
+ %tmp_clos2 = inttoptr i64 %tmp_clos to i64*
+ %indirect_call = bitcast i64* %tmp_clos2 to void (i64, i64, i64)*
+ ; CHECK: movl $42, %esi
+ ; CHECK-NEXT: jmpq *(%rax)
+ tail call cc 11 void %indirect_call(i64 undef, i64 undef, i64 42) nounwind
+ ret void
+}
+
+@clos = external constant i64
+declare cc 11 void @bar(i64, i64, i64, i64, i64, i64)
diff --git a/test/CodeGen/X86/inline-asm.ll b/test/CodeGen/X86/inline-asm.ll
index e6eb9efd8c..d201ebdc85 100644
--- a/test/CodeGen/X86/inline-asm.ll
+++ b/test/CodeGen/X86/inline-asm.ll
@@ -52,3 +52,10 @@ entry:
%0 = call { i32, i32, i32, i32, i32 } asm sideeffect "", "=&r,=&r,=&r,=&r,=&q,r,~{ecx},~{memory},~{dirflag},~{fpsr},~{flags}"(i8* %h) nounwind
ret void
}
+
+; Mix normal and EC defs of the same register.
+define i32 @pr14376() nounwind noinline {
+entry:
+ %asm = tail call i32 asm sideeffect "", "={ax},i,~{eax},~{flags},~{rax}"(i64 61) nounwind
+ ret i32 %asm
+}
diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll
index eae2e70834..7a2bbc4ef0 100644
--- a/test/CodeGen/X86/memcpy-2.ll
+++ b/test/CodeGen/X86/memcpy-2.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mattr=+sse2 -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -mattr=+sse2 -mtriple=i686-pc-mingw32 -mcpu=core2 | FileCheck %s -check-prefix=SSE2
; RUN: llc < %s -mattr=+sse,-sse2 -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE1
; RUN: llc < %s -mattr=-sse -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=NOSSE
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=X86-64
diff --git a/test/CodeGen/X86/memset-sse-stack-realignment.ll b/test/CodeGen/X86/memset-sse-stack-realignment.ll
new file mode 100644
index 0000000000..df9de5dfaf
--- /dev/null
+++ b/test/CodeGen/X86/memset-sse-stack-realignment.ll
@@ -0,0 +1,77 @@
+; Make sure that we realign the stack. Mingw32 uses 4 byte stack alignment, we
+; need 16 bytes for SSE and 32 bytes for AVX.
+
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=pentium2 | FileCheck %s -check-prefix=NOSSE
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=pentium3 | FileCheck %s -check-prefix=SSE1
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=yonah | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX1
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=core-avx2 | FileCheck %s -check-prefix=AVX2
+
+define void @test1(i32 %t) nounwind {
+ %tmp1210 = alloca i8, i32 32, align 4
+ call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 32, i32 4, i1 false)
+ %x = alloca i8, i32 %t
+ call void @dummy(i8* %x)
+ ret void
+
+; NOSSE: test1:
+; NOSSE-NOT: and
+; NOSSE: movl $0
+
+; SSE1: test1:
+; SSE1: andl $-16
+; SSE1: movl %esp, %esi
+; SSE1: movaps
+
+; SSE2: test1:
+; SSE2: andl $-16
+; SSE2: movl %esp, %esi
+; SSE2: movaps
+
+; AVX1: test1:
+; AVX1: andl $-32
+; AVX1: movl %esp, %esi
+; AVX1: vmovaps %ymm
+
+; AVX2: test1:
+; AVX2: andl $-32
+; AVX2: movl %esp, %esi
+; AVX2: vmovaps %ymm
+
+}
+
+define void @test2(i32 %t) nounwind {
+ %tmp1210 = alloca i8, i32 16, align 4
+ call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 16, i32 4, i1 false)
+ %x = alloca i8, i32 %t
+ call void @dummy(i8* %x)
+ ret void
+
+; NOSSE: test2:
+; NOSSE-NOT: and
+; NOSSE: movl $0
+
+; SSE1: test2:
+; SSE1: andl $-16
+; SSE1: movl %esp, %esi
+; SSE1: movaps
+
+; SSE2: test2:
+; SSE2: andl $-16
+; SSE2: movl %esp, %esi
+; SSE2: movaps
+
+; AVX1: test2:
+; AVX1: andl $-16
+; AVX1: movl %esp, %esi
+; AVX1: vmovaps %xmm
+
+; AVX2: test2:
+; AVX2: andl $-16
+; AVX2: movl %esp, %esi
+; AVX2: vmovaps %xmm
+}
+
+declare void @dummy(i8*)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/memset.ll b/test/CodeGen/X86/memset.ll
index 72b3e0fa3d..b35f2615d0 100644
--- a/test/CodeGen/X86/memset.ll
+++ b/test/CodeGen/X86/memset.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 9
-; RUN: llc < %s -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 3
+; RUN: llc < %s -march=x86 -mcpu=pentium2 -mtriple=i686-apple-darwin8.8.0 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -march=x86 -mcpu=pentium3 -mtriple=i686-apple-darwin8.8.0 | FileCheck %s --check-prefix=XMM
+; RUN: llc < %s -march=x86 -mcpu=bdver1 -mtriple=i686-apple-darwin8.8.0 | FileCheck %s --check-prefix=YMM
%struct.x = type { i16, i16 }
@@ -8,7 +9,27 @@ entry:
%up_mvd = alloca [8 x %struct.x] ; <[8 x %struct.x]*> [#uses=2]
%up_mvd116 = getelementptr [8 x %struct.x]* %up_mvd, i32 0, i32 0 ; <%struct.x*> [#uses=1]
%tmp110117 = bitcast [8 x %struct.x]* %up_mvd to i8* ; <i8*> [#uses=1]
+
call void @llvm.memset.p0i8.i64(i8* %tmp110117, i8 0, i64 32, i32 8, i1 false)
+; X86: movl $0,
+; X86: movl $0,
+; X86: movl $0,
+; X86: movl $0,
+; X86: movl $0,
+; X86: movl $0,
+; X86: movl $0,
+; X86: movl $0,
+; X86-NOT: movl $0,
+
+; XMM: xorps %xmm{{[0-9]+}}, [[Z:%xmm[0-9]+]]
+; XMM: movaps [[Z]],
+; XMM: movaps [[Z]],
+; XMM-NOT: movaps
+
+; YMM: vxorps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, [[Z:%ymm[0-9]+]]
+; YMM: vmovaps [[Z]],
+; YMM-NOT: movaps
+
call void @foo( %struct.x* %up_mvd116 ) nounwind
ret void
}
diff --git a/test/CodeGen/X86/memset64-on-x86-32.ll b/test/CodeGen/X86/memset64-on-x86-32.ll
index e20fce172f..8cfa032797 100644
--- a/test/CodeGen/X86/memset64-on-x86-32.ll
+++ b/test/CodeGen/X86/memset64-on-x86-32.ll
@@ -1,5 +1,6 @@
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=nehalem | grep movups | count 5
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=core2 | grep movl | count 20
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=core2 | grep movl | count 20
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | grep movq | count 10
define void @bork() nounwind {
diff --git a/test/CodeGen/X86/pr14314.ll b/test/CodeGen/X86/pr14314.ll
new file mode 100644
index 0000000000..0832702244
--- /dev/null
+++ b/test/CodeGen/X86/pr14314.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=i386-pc-linux -mcpu=corei7 | FileCheck %s
+
+define i64 @atomicSub(i64* %a, i64 %b) nounwind {
+entry:
+ %0 = atomicrmw sub i64* %a, i64 %b seq_cst
+ ret i64 %0
+; CHECK: atomicSub
+; CHECK: movl %eax, %ebx
+; CHECK: subl {{%[a-z]+}}, %ebx
+; CHECK: movl %edx, %ecx
+; CHECK: sbbl {{%[a-z]+}}, %ecx
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/pr14333.ll b/test/CodeGen/X86/pr14333.ll
new file mode 100644
index 0000000000..86c12ef6b5
--- /dev/null
+++ b/test/CodeGen/X86/pr14333.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s
+%foo = type { i64, i64 }
+define void @bar(%foo* %zed) {
+ %tmp = getelementptr inbounds %foo* %zed, i64 0, i32 0
+ store i64 0, i64* %tmp, align 8
+ %tmp2 = getelementptr inbounds %foo* %zed, i64 0, i32 1
+ store i64 0, i64* %tmp2, align 8
+ %tmp3 = bitcast %foo* %zed to i8*
+ call void @llvm.memset.p0i8.i64(i8* %tmp3, i8 0, i64 16, i32 8, i1 false)
+ ret void
+}
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/vec_floor.ll b/test/CodeGen/X86/vec_floor.ll
index 5e0160bd28..4db68bd182 100644
--- a/test/CodeGen/X86/vec_floor.ll
+++ b/test/CodeGen/X86/vec_floor.ll
@@ -36,3 +36,147 @@ define <8 x float> @floor_v8f32(<8 x float> %p)
ret <8 x float> %t
}
declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
+
+define <2 x double> @ceil_v2f64(<2 x double> %p)
+{
+ ; CHECK: ceil_v2f64
+ ; CHECK: vroundpd
+ %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
+ ret <2 x double> %t
+}
+declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
+
+define <4 x float> @ceil_v4f32(<4 x float> %p)
+{
+ ; CHECK: ceil_v4f32
+ ; CHECK: vroundps
+ %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
+ ret <4 x float> %t
+}
+declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
+
+define <4 x double> @ceil_v4f64(<4 x double> %p)
+{
+ ; CHECK: ceil_v4f64
+ ; CHECK: vroundpd
+ %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
+ ret <4 x double> %t
+}
+declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
+
+define <8 x float> @ceil_v8f32(<8 x float> %p)
+{
+ ; CHECK: ceil_v8f32
+ ; CHECK: vroundps
+ %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
+ ret <8 x float> %t
+}
+declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
+
+define <2 x double> @trunc_v2f64(<2 x double> %p)
+{
+ ; CHECK: trunc_v2f64
+ ; CHECK: vroundpd
+ %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
+ ret <2 x double> %t
+}
+declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
+
+define <4 x float> @trunc_v4f32(<4 x float> %p)
+{
+ ; CHECK: trunc_v4f32
+ ; CHECK: vroundps
+ %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
+ ret <4 x float> %t
+}
+declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
+
+define <4 x double> @trunc_v4f64(<4 x double> %p)
+{
+ ; CHECK: trunc_v4f64
+ ; CHECK: vroundpd
+ %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
+ ret <4 x double> %t
+}
+declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
+
+define <8 x float> @trunc_v8f32(<8 x float> %p)
+{
+ ; CHECK: trunc_v8f32
+ ; CHECK: vroundps
+ %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
+ ret <8 x float> %t
+}
+declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
+
+define <2 x double> @rint_v2f64(<2 x double> %p)
+{
+ ; CHECK: rint_v2f64
+ ; CHECK: vroundpd
+ %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
+ ret <2 x double> %t
+}
+declare <2 x double> @llvm.rint.v2f64(<2 x double> %p)
+
+define <4 x float> @rint_v4f32(<4 x float> %p)
+{
+ ; CHECK: rint_v4f32
+ ; CHECK: vroundps
+ %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
+ ret <4 x float> %t
+}
+declare <4 x float> @llvm.rint.v4f32(<4 x float> %p)
+
+define <4 x double> @rint_v4f64(<4 x double> %p)
+{
+ ; CHECK: rint_v4f64
+ ; CHECK: vroundpd
+ %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
+ ret <4 x double> %t
+}
+declare <4 x double> @llvm.rint.v4f64(<4 x double> %p)
+
+define <8 x float> @rint_v8f32(<8 x float> %p)
+{
+ ; CHECK: rint_v8f32
+ ; CHECK: vroundps
+ %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
+ ret <8 x float> %t
+}
+declare <8 x float> @llvm.rint.v8f32(<8 x float> %p)
+
+define <2 x double> @nearbyint_v2f64(<2 x double> %p)
+{
+ ; CHECK: nearbyint_v2f64
+ ; CHECK: vroundpd
+ %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
+ ret <2 x double> %t
+}
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
+
+define <4 x float> @nearbyint_v4f32(<4 x float> %p)
+{
+ ; CHECK: nearbyint_v4f32
+ ; CHECK: vroundps
+ %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
+ ret <4 x float> %t
+}
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
+
+define <4 x double> @nearbyint_v4f64(<4 x double> %p)
+{
+ ; CHECK: nearbyint_v4f64
+ ; CHECK: vroundpd
+ %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
+ ret <4 x double> %t
+}
+declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
+
+define <8 x float> @nearbyint_v8f32(<8 x float> %p)
+{
+ ; CHECK: nearbyint_v8f32
+ ; CHECK: vroundps
+ %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
+ ret <8 x float> %t
+}
+declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
diff --git a/test/CodeGen/XCore/aliases.ll b/test/CodeGen/XCore/aliases.ll
new file mode 100644
index 0000000000..d83b246a55
--- /dev/null
+++ b/test/CodeGen/XCore/aliases.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+declare void @a_val() nounwind
+@b_val = external constant i32, section ".cp.rodata"
+@c_val = external global i32
+
+@a = alias void ()* @a_val
+@b = alias i32* @b_val
+@c = alias i32* @c_val
+
+; CHECK: a_addr:
+; CHECK: ldap r11, a
+; CHECK: retsp
+define void ()* @a_addr() nounwind {
+entry:
+ ret void ()* @a
+}
+
+; CHECK: b_addr:
+; CHECK: ldaw r11, cp[b]
+; CHECK: retsp
+define i32 *@b_addr() nounwind {
+entry:
+ ret i32* @b
+}
+
+; CHECK: c_addr:
+; CHECK: ldaw r0, dp[c]
+; CHECK: retsp
+define i32 *@c_addr() nounwind {
+entry:
+ ret i32* @c
+}
diff --git a/test/ExecutionEngine/MCJIT/lit.local.cfg b/test/ExecutionEngine/MCJIT/lit.local.cfg
index af3d13f746..fc29f651aa 100644
--- a/test/ExecutionEngine/MCJIT/lit.local.cfg
+++ b/test/ExecutionEngine/MCJIT/lit.local.cfg
@@ -8,13 +8,17 @@ def getRoot(config):
root = getRoot(config)
targets = set(root.targets_to_build.split())
-if ('X86' in targets) | ('ARM' in targets) | ('Mips' in targets):
+if ('X86' in targets) | ('ARM' in targets) | ('Mips' in targets) | \
+ ('PowerPC' in targets):
config.unsupported = False
else:
config.unsupported = True
-if root.host_arch not in ['x86', 'x86_64', 'ARM', 'Mips']:
+if root.host_arch not in ['x86', 'x86_64', 'ARM', 'Mips', 'PowerPC']:
config.unsupported = True
if root.host_os in ['Darwin']:
config.unsupported = True
+
+if 'powerpc' in root.target_triple and not 'powerpc64' in root.target_triple:
+ config.unsupported = True
diff --git a/test/ExecutionEngine/MCJIT/simpletest-remote.ll b/test/ExecutionEngine/MCJIT/simpletest-remote.ll
new file mode 100644
index 0000000000..272204c63c
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/simpletest-remote.ll
@@ -0,0 +1,12 @@
+; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -remote-mcjit %s > /dev/null
+; XFAIL: arm, mips
+
+define i32 @bar() {
+ ret i32 0
+}
+
+define i32 @main() {
+ %r = call i32 @bar( ) ; <i32> [#uses=1]
+ ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/stubs-remote.ll b/test/ExecutionEngine/MCJIT/stubs-remote.ll
new file mode 100644
index 0000000000..4c7684fd20
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/stubs-remote.ll
@@ -0,0 +1,36 @@
+; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -remote-mcjit -disable-lazy-compilation=false %s
+; XFAIL: arm, mips
+
+define i32 @main() nounwind {
+entry:
+ call void @lazily_compiled_address_is_consistent()
+ ret i32 0
+}
+
+; Test PR3043: @test should have the same address before and after
+; it's JIT-compiled.
+@funcPtr = common global i1 ()* null, align 4
+@lcaic_failure = internal constant [46 x i8] c"@lazily_compiled_address_is_consistent failed\00"
+
+define void @lazily_compiled_address_is_consistent() nounwind {
+entry:
+ store i1 ()* @test, i1 ()** @funcPtr
+ %pass = tail call i1 @test() ; <i32> [#uses=1]
+ br i1 %pass, label %pass_block, label %fail_block
+pass_block:
+ ret void
+fail_block:
+ call i32 @puts(i8* getelementptr([46 x i8]* @lcaic_failure, i32 0, i32 0))
+ call void @exit(i32 1)
+ unreachable
+}
+
+define i1 @test() nounwind {
+entry:
+ %tmp = load i1 ()** @funcPtr
+ %eq = icmp eq i1 ()* %tmp, @test
+ ret i1 %eq
+}
+
+declare i32 @puts(i8*) noreturn
+declare void @exit(i32) noreturn
diff --git a/test/ExecutionEngine/MCJIT/test-common-symbols-remote.ll b/test/ExecutionEngine/MCJIT/test-common-symbols-remote.ll
new file mode 100644
index 0000000000..285ce5cea1
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-common-symbols-remote.ll
@@ -0,0 +1,89 @@
+; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -remote-mcjit -O0 -disable-lazy-compilation=false %s
+; XFAIL: arm, mips
+
+; The intention of this test is to verify that symbols mapped to COMMON in ELF
+; work as expected.
+;
+; Compiled from this C code:
+;
+; int zero_int;
+; double zero_double;
+; int zero_arr[10];
+;
+; int main()
+; {
+; zero_arr[zero_int + 5] = 40;
+;
+; if (zero_double < 1.0)
+; zero_arr[zero_int + 2] = 70;
+;
+; for (int i = 1; i < 10; ++i) {
+; zero_arr[i] = zero_arr[i - 1] + zero_arr[i];
+; }
+; return zero_arr[9] == 110 ? 0 : -1;
+; }
+
+@zero_int = common global i32 0, align 4
+@zero_arr = common global [10 x i32] zeroinitializer, align 16
+@zero_double = common global double 0.000000e+00, align 8
+
+define i32 @main() nounwind {
+entry:
+ %retval = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 0, i32* %retval
+ %0 = load i32* @zero_int, align 4
+ %add = add nsw i32 %0, 5
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom
+ store i32 40, i32* %arrayidx, align 4
+ %1 = load double* @zero_double, align 8
+ %cmp = fcmp olt double %1, 1.000000e+00
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %2 = load i32* @zero_int, align 4
+ %add1 = add nsw i32 %2, 2
+ %idxprom2 = sext i32 %add1 to i64
+ %arrayidx3 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom2
+ store i32 70, i32* %arrayidx3, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ store i32 1, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %if.end
+ %3 = load i32* %i, align 4
+ %cmp4 = icmp slt i32 %3, 10
+ br i1 %cmp4, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %4 = load i32* %i, align 4
+ %sub = sub nsw i32 %4, 1
+ %idxprom5 = sext i32 %sub to i64
+ %arrayidx6 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom5
+ %5 = load i32* %arrayidx6, align 4
+ %6 = load i32* %i, align 4
+ %idxprom7 = sext i32 %6 to i64
+ %arrayidx8 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom7
+ %7 = load i32* %arrayidx8, align 4
+ %add9 = add nsw i32 %5, %7
+ %8 = load i32* %i, align 4
+ %idxprom10 = sext i32 %8 to i64
+ %arrayidx11 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom10
+ store i32 %add9, i32* %arrayidx11, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %9 = load i32* %i, align 4
+ %inc = add nsw i32 %9, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ %10 = load i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
+ %cmp12 = icmp eq i32 %10, 110
+ %cond = select i1 %cmp12, i32 0, i32 -1
+ ret i32 %cond
+}
diff --git a/test/ExecutionEngine/MCJIT/test-data-align-remote.ll b/test/ExecutionEngine/MCJIT/test-data-align-remote.ll
new file mode 100644
index 0000000000..a1591d0fc4
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-data-align-remote.ll
@@ -0,0 +1,16 @@
+; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -remote-mcjit -O0 %s
+; XFAIL: arm, mips
+
+; Check that a variable is always aligned as specified.
+
+@var = global i32 0, align 32
+define i32 @main() {
+ %addr = ptrtoint i32* @var to i64
+ %mask = and i64 %addr, 31
+ %tst = icmp eq i64 %mask, 0
+ br i1 %tst, label %good, label %bad
+good:
+ ret i32 0
+bad:
+ ret i32 1
+}
diff --git a/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs-remote.ll b/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs-remote.ll
new file mode 100644
index 0000000000..69c73b99c9
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs-remote.ll
@@ -0,0 +1,22 @@
+; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -remote-mcjit %s > /dev/null
+; XFAIL: arm, mips
+
+define double @test(double* %DP, double %Arg) {
+ %D = load double* %DP ; <double> [#uses=1]
+ %V = fadd double %D, 1.000000e+00 ; <double> [#uses=2]
+ %W = fsub double %V, %V ; <double> [#uses=3]
+ %X = fmul double %W, %W ; <double> [#uses=2]
+ %Y = fdiv double %X, %X ; <double> [#uses=2]
+ %Q = fadd double %Y, %Arg ; <double> [#uses=1]
+ %R = bitcast double %Q to double ; <double> [#uses=1]
+ store double %Q, double* %DP
+ ret double %Y
+}
+
+define i32 @main() {
+ %X = alloca double ; <double*> [#uses=2]
+ store double 0.000000e+00, double* %X
+ call double @test( double* %X, double 2.000000e+00 ) ; <double>:1 [#uses=0]
+ ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-global-ctors.ll b/test/ExecutionEngine/MCJIT/test-global-ctors.ll
new file mode 100644
index 0000000000..fbe9118d53
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-global-ctors.ll
@@ -0,0 +1,21 @@
+; RUN: %lli -mtriple=%mcjit_triple -use-mcjit %s > /dev/null
+@var = global i32 1, align 4
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @ctor_func }]
+@llvm.global_dtors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @dtor_func }]
+
+define i32 @main() nounwind {
+entry:
+ %0 = load i32* @var, align 4
+ ret i32 %0
+}
+
+define internal void @ctor_func() section ".text.startup" {
+entry:
+ store i32 0, i32* @var, align 4
+ ret void
+}
+
+define internal void @dtor_func() section ".text.startup" {
+entry:
+ ret void
+}
diff --git a/test/ExecutionEngine/MCJIT/test-global-init-nonzero-remote.ll b/test/ExecutionEngine/MCJIT/test-global-init-nonzero-remote.ll
new file mode 100644
index 0000000000..8b7c83e018
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-global-init-nonzero-remote.ll
@@ -0,0 +1,35 @@
+; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -remote-mcjit %s > /dev/null
+; XFAIL: arm, mips
+
+@count = global i32 1, align 4
+
+define i32 @main() nounwind uwtable {
+entry:
+ %retval = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 0, i32* %retval
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32* %i, align 4
+ %cmp = icmp slt i32 %0, 49
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %1 = load i32* @count, align 4
+ %inc = add nsw i32 %1, 1
+ store i32 %inc, i32* @count, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %2 = load i32* %i, align 4
+ %inc1 = add nsw i32 %2, 1
+ store i32 %inc1, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ %3 = load i32* @count, align 4
+ %sub = sub nsw i32 %3, 50
+ ret i32 %sub
+}
diff --git a/test/ExecutionEngine/MCJIT/test-ptr-reloc-remote.ll b/test/ExecutionEngine/MCJIT/test-ptr-reloc-remote.ll
new file mode 100644
index 0000000000..773e4a195f
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-ptr-reloc-remote.ll
@@ -0,0 +1,17 @@
+; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -remote-mcjit -O0 %s
+; XFAIL: arm, mips
+
+@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
+@ptr = global i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), align 4
+@.str1 = private unnamed_addr constant [6 x i8] c"data2\00", align 1
+@ptr2 = global i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), align 4
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
+entry:
+ %0 = load i8** @ptr, align 4
+ %1 = load i8** @ptr2, align 4
+ %cmp = icmp eq i8* %0, %1
+ %. = zext i1 %cmp to i32
+ ret i32 %.
+}
+
diff --git a/test/ExecutionEngine/lit.local.cfg b/test/ExecutionEngine/lit.local.cfg
index 19eebc0ac7..f0343263db 100644
--- a/test/ExecutionEngine/lit.local.cfg
+++ b/test/ExecutionEngine/lit.local.cfg
@@ -1 +1,12 @@
config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+ if not config.parent:
+ return config
+ return getRoot(config.parent)
+
+root = getRoot(config)
+
+if root.host_arch in ['PowerPC']:
+ config.unsupported = True
+
diff --git a/test/FileCheck/lit.local.cfg b/test/FileCheck/lit.local.cfg
new file mode 100644
index 0000000000..ee25f56231
--- /dev/null
+++ b/test/FileCheck/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.txt']
diff --git a/test/FileCheck/next-no-match.txt b/test/FileCheck/next-no-match.txt
new file mode 100644
index 0000000000..908615b248
--- /dev/null
+++ b/test/FileCheck/next-no-match.txt
@@ -0,0 +1,9 @@
+// RUN: not FileCheck -input-file %s %s
+
+foo
+bar
+; CHECK: foo
+baz
+; CHECK-NEXT: baz
+
+
diff --git a/test/FileCheck/regex-no-match.txt b/test/FileCheck/regex-no-match.txt
new file mode 100644
index 0000000000..f80ac12911
--- /dev/null
+++ b/test/FileCheck/regex-no-match.txt
@@ -0,0 +1,5 @@
+// RUN: not FileCheck -input-file %s %s
+
+foobar
+; CHECK: fooba{{[a-b]}}
+
diff --git a/test/FileCheck/simple-var-capture.txt b/test/FileCheck/simple-var-capture.txt
new file mode 100644
index 0000000000..c0214d9017
--- /dev/null
+++ b/test/FileCheck/simple-var-capture.txt
@@ -0,0 +1,12 @@
+// RUN: FileCheck -input-file %s %s
+
+op1 r1
+op2 r1, r2
+; CHECK: op1 [[REG:r[0-9]+]]
+; CHECK-NEXT: op2 [[REG]]
+
+op3 r16, r18, r21
+op4 r30, r18, r21
+; CHECK: op3 {{r[0-9]+}}, [[REGa:r[0-9]+]], [[REGb:r[0-9]+]]
+; CHECK-NEXT: op4 {{r[0-9]+}}, [[REGa]], [[REGb]]
+
diff --git a/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
index 472551654e..c11a0498c3 100644
--- a/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
@@ -1,11 +1,15 @@
; RUN: opt < %s -asan -asan-initialization-order -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
-@xxx = global i32 0, align 4
+@xxx = internal global i32 0, align 4 ; With dynamic initializer.
+@XXX = global i32 0, align 4 ; With dynamic initializer.
+@yyy = internal global i32 0, align 4 ; W/o dynamic initializer.
+@YYY = global i32 0, align 4 ; W/o dynamic initializer.
; Clang will emit the following metadata identifying @xxx as dynamically
; initialized.
!0 = metadata !{i32* @xxx}
-!llvm.asan.dynamically_initialized_globals = !{!0}
+!1 = metadata !{i32* @XXX}
+!llvm.asan.dynamically_initialized_globals = !{!0, !1}
define i32 @initializer() uwtable {
entry:
@@ -34,3 +38,40 @@ entry:
; CHECK: call void @__cxx_global_var_init
; CHECK: call void @__asan_after_dynamic_init
; CHECK: ret
+
+; Check that xxx is instrumented.
+define void @touch_xxx() address_safety {
+ store i32 0, i32 *@xxx, align 4
+ ret void
+; CHECK: define void @touch_xxx
+; CHECK: call void @__asan_report_store4
+; CHECK: ret void
+}
+
+; Check that XXX is instrumented.
+define void @touch_XXX() address_safety {
+ store i32 0, i32 *@XXX, align 4
+ ret void
+; CHECK: define void @touch_XXX
+; CHECK: call void @__asan_report_store4
+; CHECK: ret void
+}
+
+
+; Check that yyy is NOT instrumented (as it does not have dynamic initializer).
+define void @touch_yyy() address_safety {
+ store i32 0, i32 *@yyy, align 4
+ ret void
+; CHECK: define void @touch_yyy
+; CHECK-NOT: call void @__asan_report_store4
+; CHECK: ret void
+}
+
+; Check that YYY is NOT instrumented (as it does not have dynamic initializer).
+define void @touch_YYY() address_safety {
+ store i32 0, i32 *@YYY, align 4
+ ret void
+; CHECK: define void @touch_YYY
+; CHECK-NOT: call void @__asan_report_store4
+; CHECK: ret void
+}
diff --git a/test/Instrumentation/ThreadSanitizer/atomic.ll b/test/Instrumentation/ThreadSanitizer/atomic.ll
index 107dbdc0f2..d9fc222f12 100644
--- a/test/Instrumentation/ThreadSanitizer/atomic.ll
+++ b/test/Instrumentation/ThreadSanitizer/atomic.ll
@@ -312,7 +312,7 @@ entry:
ret void
}
; CHECK: atomic8_cas_monotonic
-; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 0)
+; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 0, i32 0)
define void @atomic8_cas_acquire(i8* %a) nounwind uwtable {
entry:
@@ -320,7 +320,7 @@ entry:
ret void
}
; CHECK: atomic8_cas_acquire
-; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 2)
+; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 2, i32 2)
define void @atomic8_cas_release(i8* %a) nounwind uwtable {
entry:
@@ -328,7 +328,7 @@ entry:
ret void
}
; CHECK: atomic8_cas_release
-; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 3)
+; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 3, i32 0)
define void @atomic8_cas_acq_rel(i8* %a) nounwind uwtable {
entry:
@@ -336,7 +336,7 @@ entry:
ret void
}
; CHECK: atomic8_cas_acq_rel
-; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 4)
+; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 4, i32 2)
define void @atomic8_cas_seq_cst(i8* %a) nounwind uwtable {
entry:
@@ -344,7 +344,7 @@ entry:
ret void
}
; CHECK: atomic8_cas_seq_cst
-; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 5)
+; CHECK: call i8 @__tsan_atomic8_compare_exchange_val(i8* %a, i8 0, i8 1, i32 5, i32 5)
define i16 @atomic16_load_unordered(i16* %a) nounwind uwtable {
entry:
@@ -656,7 +656,7 @@ entry:
ret void
}
; CHECK: atomic16_cas_monotonic
-; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 0)
+; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 0, i32 0)
define void @atomic16_cas_acquire(i16* %a) nounwind uwtable {
entry:
@@ -664,7 +664,7 @@ entry:
ret void
}
; CHECK: atomic16_cas_acquire
-; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 2)
+; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 2, i32 2)
define void @atomic16_cas_release(i16* %a) nounwind uwtable {
entry:
@@ -672,7 +672,7 @@ entry:
ret void
}
; CHECK: atomic16_cas_release
-; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 3)
+; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 3, i32 0)
define void @atomic16_cas_acq_rel(i16* %a) nounwind uwtable {
entry:
@@ -680,7 +680,7 @@ entry:
ret void
}
; CHECK: atomic16_cas_acq_rel
-; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 4)
+; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 4, i32 2)
define void @atomic16_cas_seq_cst(i16* %a) nounwind uwtable {
entry:
@@ -688,7 +688,7 @@ entry:
ret void
}
; CHECK: atomic16_cas_seq_cst
-; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 5)
+; CHECK: call i16 @__tsan_atomic16_compare_exchange_val(i16* %a, i16 0, i16 1, i32 5, i32 5)
define i32 @atomic32_load_unordered(i32* %a) nounwind uwtable {
entry:
@@ -1000,7 +1000,7 @@ entry:
ret void
}
; CHECK: atomic32_cas_monotonic
-; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 0)
+; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 0, i32 0)
define void @atomic32_cas_acquire(i32* %a) nounwind uwtable {
entry:
@@ -1008,7 +1008,7 @@ entry:
ret void
}
; CHECK: atomic32_cas_acquire
-; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 2)
+; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 2, i32 2)
define void @atomic32_cas_release(i32* %a) nounwind uwtable {
entry:
@@ -1016,7 +1016,7 @@ entry:
ret void
}
; CHECK: atomic32_cas_release
-; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 3)
+; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 3, i32 0)
define void @atomic32_cas_acq_rel(i32* %a) nounwind uwtable {
entry:
@@ -1024,7 +1024,7 @@ entry:
ret void
}
; CHECK: atomic32_cas_acq_rel
-; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 4)
+; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 4, i32 2)
define void @atomic32_cas_seq_cst(i32* %a) nounwind uwtable {
entry:
@@ -1032,7 +1032,7 @@ entry:
ret void
}
; CHECK: atomic32_cas_seq_cst
-; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 5)
+; CHECK: call i32 @__tsan_atomic32_compare_exchange_val(i32* %a, i32 0, i32 1, i32 5, i32 5)
define i64 @atomic64_load_unordered(i64* %a) nounwind uwtable {
entry:
@@ -1344,7 +1344,7 @@ entry:
ret void
}
; CHECK: atomic64_cas_monotonic
-; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 0)
+; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 0, i32 0)
define void @atomic64_cas_acquire(i64* %a) nounwind uwtable {
entry:
@@ -1352,7 +1352,7 @@ entry:
ret void
}
; CHECK: atomic64_cas_acquire
-; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 2)
+; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 2, i32 2)
define void @atomic64_cas_release(i64* %a) nounwind uwtable {
entry:
@@ -1360,7 +1360,7 @@ entry:
ret void
}
; CHECK: atomic64_cas_release
-; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 3)
+; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 3, i32 0)
define void @atomic64_cas_acq_rel(i64* %a) nounwind uwtable {
entry:
@@ -1368,7 +1368,7 @@ entry:
ret void
}
; CHECK: atomic64_cas_acq_rel
-; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 4)
+; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 4, i32 2)
define void @atomic64_cas_seq_cst(i64* %a) nounwind uwtable {
entry:
@@ -1376,7 +1376,7 @@ entry:
ret void
}
; CHECK: atomic64_cas_seq_cst
-; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 5)
+; CHECK: call i64 @__tsan_atomic64_compare_exchange_val(i64* %a, i64 0, i64 1, i32 5, i32 5)
define i128 @atomic128_load_unordered(i128* %a) nounwind uwtable {
entry:
@@ -1688,7 +1688,7 @@ entry:
ret void
}
; CHECK: atomic128_cas_monotonic
-; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 0)
+; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 0, i32 0)
define void @atomic128_cas_acquire(i128* %a) nounwind uwtable {
entry:
@@ -1696,7 +1696,7 @@ entry:
ret void
}
; CHECK: atomic128_cas_acquire
-; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 2)
+; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 2, i32 2)
define void @atomic128_cas_release(i128* %a) nounwind uwtable {
entry:
@@ -1704,7 +1704,7 @@ entry:
ret void
}
; CHECK: atomic128_cas_release
-; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 3)
+; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 3, i32 0)
define void @atomic128_cas_acq_rel(i128* %a) nounwind uwtable {
entry:
@@ -1712,7 +1712,7 @@ entry:
ret void
}
; CHECK: atomic128_cas_acq_rel
-; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 4)
+; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 4, i32 2)
define void @atomic128_cas_seq_cst(i128* %a) nounwind uwtable {
entry:
@@ -1720,7 +1720,7 @@ entry:
ret void
}
; CHECK: atomic128_cas_seq_cst
-; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 5)
+; CHECK: call i128 @__tsan_atomic128_compare_exchange_val(i128* %a, i128 0, i128 1, i32 5, i32 5)
define void @atomic_signal_fence_acquire() nounwind uwtable {
entry:
diff --git a/test/JitListener/lit.local.cfg b/test/JitListener/lit.local.cfg
new file mode 100644
index 0000000000..a5aa6de182
--- /dev/null
+++ b/test/JitListener/lit.local.cfg
@@ -0,0 +1,11 @@
+config.suffixes = ['.ll']
+
+def getRoot(config):
+ if not config.parent:
+ return config
+ return getRoot(config.parent)
+
+root = getRoot(config)
+if not root.llvm_use_intel_jitevents == "ON":
+ config.unsupported = True
+
diff --git a/test/JitListener/test-common-symbols.ll b/test/JitListener/test-common-symbols.ll
new file mode 100644
index 0000000000..5f460ff686
--- /dev/null
+++ b/test/JitListener/test-common-symbols.ll
@@ -0,0 +1,113 @@
+; RUN: llvm-jitlistener %s | FileCheck %s
+
+; CHECK: Method load [1]: main, Size = 164
+; CHECK: Method unload [1]
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@zero_int = common global i32 0, align 4
+@zero_arr = common global [10 x i32] zeroinitializer, align 16
+@zero_double = common global double 0.000000e+00, align 8
+
+define i32 @main() nounwind uwtable {
+entry:
+ %retval = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 0, i32* %retval
+ %0 = load i32* @zero_int, align 4, !dbg !21
+ %add = add nsw i32 %0, 5, !dbg !21
+ %idxprom = sext i32 %add to i64, !dbg !21
+ %arrayidx = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom, !dbg !21
+ store i32 40, i32* %arrayidx, align 4, !dbg !21
+ %1 = load double* @zero_double, align 8, !dbg !23
+ %cmp = fcmp olt double %1, 1.000000e+00, !dbg !23
+ br i1 %cmp, label %if.then, label %if.end, !dbg !23
+
+if.then: ; preds = %entry
+ %2 = load i32* @zero_int, align 4, !dbg !24
+ %add1 = add nsw i32 %2, 2, !dbg !24
+ %idxprom2 = sext i32 %add1 to i64, !dbg !24
+ %arrayidx3 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom2, !dbg !24
+ store i32 70, i32* %arrayidx3, align 4, !dbg !24
+ br label %if.end, !dbg !24
+
+if.end: ; preds = %if.then, %entry
+ call void @llvm.dbg.declare(metadata !{i32* %i}, metadata !25), !dbg !27
+ store i32 1, i32* %i, align 4, !dbg !28
+ br label %for.cond, !dbg !28
+
+for.cond: ; preds = %for.inc, %if.end
+ %3 = load i32* %i, align 4, !dbg !28
+ %cmp4 = icmp slt i32 %3, 10, !dbg !28
+ br i1 %cmp4, label %for.body, label %for.end, !dbg !28
+
+for.body: ; preds = %for.cond
+ %4 = load i32* %i, align 4, !dbg !29
+ %sub = sub nsw i32 %4, 1, !dbg !29
+ %idxprom5 = sext i32 %sub to i64, !dbg !29
+ %arrayidx6 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom5, !dbg !29
+ %5 = load i32* %arrayidx6, align 4, !dbg !29
+ %6 = load i32* %i, align 4, !dbg !29
+ %idxprom7 = sext i32 %6 to i64, !dbg !29
+ %arrayidx8 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom7, !dbg !29
+ %7 = load i32* %arrayidx8, align 4, !dbg !29
+ %add9 = add nsw i32 %5, %7, !dbg !29
+ %8 = load i32* %i, align 4, !dbg !29
+ %idxprom10 = sext i32 %8 to i64, !dbg !29
+ %arrayidx11 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom10, !dbg !29
+ store i32 %add9, i32* %arrayidx11, align 4, !dbg !29
+ br label %for.inc, !dbg !31
+
+for.inc: ; preds = %for.body
+ %9 = load i32* %i, align 4, !dbg !32
+ %inc = add nsw i32 %9, 1, !dbg !32
+ store i32 %inc, i32* %i, align 4, !dbg !32
+ br label %for.cond, !dbg !32
+
+for.end: ; preds = %for.cond
+ %10 = load i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4, !dbg !33
+ %cmp12 = icmp eq i32 %10, 110, !dbg !33
+ %cond = select i1 %cmp12, i32 0, i32 -1, !dbg !33
+ ret i32 %cond, !dbg !33
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"test-common-symbols.c", metadata !"/store/store/llvm/build", metadata !"clang version 3.1 ()", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"test-common-symbols.c", metadata !"/store/store/llvm/build", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 720932} ; [ DW_TAG_base_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{metadata !14, metadata !15, metadata !17}
+!14 = metadata !{i32 720948, i32 0, null, metadata !"zero_int", metadata !"zero_int", metadata !"", metadata !6, i32 1, metadata !9, i32 0, i32 1, i32* @zero_int} ; [ DW_TAG_variable ]
+!15 = metadata !{i32 720948, i32 0, null, metadata !"zero_double", metadata !"zero_double", metadata !"", metadata !6, i32 2, metadata !16, i32 0, i32 1, double* @zero_double} ; [ DW_TAG_variable ]
+!16 = metadata !{i32 720932, null, metadata !"double", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 720948, i32 0, null, metadata !"zero_arr", metadata !"zero_arr", metadata !"", metadata !6, i32 3, metadata !18, i32 0, i32 1, [10 x i32]* @zero_arr} ; [ DW_TAG_variable ]
+!18 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 320, i64 32, i32 0, i32 0, metadata !9, metadata !19, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!19 = metadata !{metadata !20}
+!20 = metadata !{i32 720929, i64 0, i64 9} ; [ DW_TAG_subrange_type ]
+!21 = metadata !{i32 7, i32 5, metadata !22, null}
+!22 = metadata !{i32 720907, metadata !5, i32 6, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 9, i32 5, metadata !22, null}
+!24 = metadata !{i32 10, i32 9, metadata !22, null}
+!25 = metadata !{i32 721152, metadata !26, metadata !"i", metadata !6, i32 12, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!26 = metadata !{i32 720907, metadata !22, i32 12, i32 5, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 12, i32 14, metadata !26, null}
+!28 = metadata !{i32 12, i32 19, metadata !26, null}
+!29 = metadata !{i32 13, i32 9, metadata !30, null}
+!30 = metadata !{i32 720907, metadata !26, i32 12, i32 34, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!31 = metadata !{i32 14, i32 5, metadata !30, null}
+!32 = metadata !{i32 12, i32 29, metadata !26, null}
+!33 = metadata !{i32 15, i32 5, metadata !22, null}
diff --git a/test/JitListener/test-inline.ll b/test/JitListener/test-inline.ll
new file mode 100644
index 0000000000..5a4bf1f374
--- /dev/null
+++ b/test/JitListener/test-inline.ll
@@ -0,0 +1,219 @@
+; RUN: llvm-jitlistener %s | FileCheck %s
+
+; CHECK: Method load [1]: _Z15test_parametersPfPA2_dR11char_structPPitm, Size = 165
+; CHECK: Method load [2]: _Z3food, Size = 39
+; CHECK: Method load [3]: main, Size = 146
+; CHECK: Method unload [1]
+; CHECK: Method unload [2]
+; CHECK: Method unload [3]
+
+; ModuleID = 'test-inline.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.char_struct = type { i8, [2 x i8] }
+
+@compound_char = global %struct.char_struct zeroinitializer, align 1
+@_ZZ4mainE1d = private unnamed_addr constant [2 x [2 x double]] [[2 x double] [double 0.000000e+00, double 1.000000e+00], [2 x double] [double 2.000000e+00, double 3.000000e+00]], align 16
+
+define double @_Z15test_parametersPfPA2_dR11char_structPPitm(float* %pf, [2 x double]* %ppd, %struct.char_struct* %s, i32** %ppn, i16 zeroext %us, i64 %l) uwtable {
+entry:
+ %pf.addr = alloca float*, align 8
+ %ppd.addr = alloca [2 x double]*, align 8
+ %s.addr = alloca %struct.char_struct*, align 8
+ %ppn.addr = alloca i32**, align 8
+ %us.addr = alloca i16, align 2
+ %l.addr = alloca i64, align 8
+ %result = alloca double, align 8
+ %result2 = alloca i32, align 4
+ store float* %pf, float** %pf.addr, align 8
+ call void @llvm.dbg.declare(metadata !{float** %pf.addr}, metadata !32), !dbg !35
+ store [2 x double]* %ppd, [2 x double]** %ppd.addr, align 8
+ call void @llvm.dbg.declare(metadata !{[2 x double]** %ppd.addr}, metadata !36), !dbg !39
+ store %struct.char_struct* %s, %struct.char_struct** %s.addr, align 8
+ call void @llvm.dbg.declare(metadata !{%struct.char_struct** %s.addr}, metadata !40), !dbg !42
+ store i32** %ppn, i32*** %ppn.addr, align 8
+ call void @llvm.dbg.declare(metadata !{i32*** %ppn.addr}, metadata !43), !dbg !46
+ store i16 %us, i16* %us.addr, align 2
+ call void @llvm.dbg.declare(metadata !{i16* %us.addr}, metadata !47), !dbg !49
+ store i64 %l, i64* %l.addr, align 8
+ call void @llvm.dbg.declare(metadata !{i64* %l.addr}, metadata !50), !dbg !53
+ call void @llvm.dbg.declare(metadata !{double* %result}, metadata !54), !dbg !56
+ %0 = load float** %pf.addr, align 8, !dbg !57
+ %arrayidx = getelementptr inbounds float* %0, i64 0, !dbg !57
+ %1 = load float* %arrayidx, !dbg !57
+ %conv = fpext float %1 to double, !dbg !57
+ %2 = load [2 x double]** %ppd.addr, align 8, !dbg !57
+ %arrayidx1 = getelementptr inbounds [2 x double]* %2, i64 1, !dbg !57
+ %arrayidx2 = getelementptr inbounds [2 x double]* %arrayidx1, i32 0, i64 1, !dbg !57
+ %3 = load double* %arrayidx2, !dbg !57
+ %mul = fmul double %conv, %3, !dbg !57
+ %4 = load %struct.char_struct** %s.addr, !dbg !57
+ %c = getelementptr inbounds %struct.char_struct* %4, i32 0, i32 0, !dbg !57
+ %5 = load i8* %c, align 1, !dbg !57
+ %conv3 = sext i8 %5 to i32, !dbg !57
+ %conv4 = sitofp i32 %conv3 to double, !dbg !57
+ %mul5 = fmul double %mul, %conv4, !dbg !57
+ %6 = load i16* %us.addr, align 2, !dbg !57
+ %conv6 = zext i16 %6 to i32, !dbg !57
+ %conv7 = sitofp i32 %conv6 to double, !dbg !57
+ %mul8 = fmul double %mul5, %conv7, !dbg !57
+ %7 = load i64* %l.addr, align 8, !dbg !57
+ %conv9 = uitofp i64 %7 to double, !dbg !57
+ %mul10 = fmul double %mul8, %conv9, !dbg !57
+ store double %mul10, double* %result, align 8, !dbg !57
+ call void @llvm.dbg.declare(metadata !{i32* %result2}, metadata !58), !dbg !59
+ %8 = load double* %result, align 8, !dbg !60
+ %call = call i32 @_Z3food(double %8), !dbg !60
+ store i32 %call, i32* %result2, align 4, !dbg !60
+ %9 = load i32* %result2, align 4, !dbg !61
+ %conv11 = sitofp i32 %9 to double, !dbg !61
+ ret double %conv11, !dbg !61
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define linkonce_odr i32 @_Z3food(double %input) nounwind uwtable inlinehint {
+entry:
+ %input.addr = alloca double, align 8
+ store double %input, double* %input.addr, align 8
+ call void @llvm.dbg.declare(metadata !{double* %input.addr}, metadata !62), !dbg !63
+ %0 = load double* %input.addr, align 8, !dbg !64
+ %div = fdiv double %0, 3.000000e+00, !dbg !64
+ %add = fadd double %div, 1.000000e+00, !dbg !64
+ %conv = fptosi double %add to i32, !dbg !64
+ ret i32 %conv, !dbg !64
+}
+
+define i32 @main(i32 %argc, i8** %argv) uwtable {
+entry:
+ %retval = alloca i32, align 4
+ %argc.addr = alloca i32, align 4
+ %argv.addr = alloca i8**, align 8
+ %s = alloca %struct.char_struct, align 1
+ %f = alloca float, align 4
+ %d = alloca [2 x [2 x double]], align 16
+ %result = alloca double, align 8
+ store i32 0, i32* %retval
+ store i32 %argc, i32* %argc.addr, align 4
+ call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !66), !dbg !67
+ store i8** %argv, i8*** %argv.addr, align 8
+ call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !68), !dbg !71
+ call void @llvm.dbg.declare(metadata !{%struct.char_struct* %s}, metadata !72), !dbg !74
+ call void @llvm.dbg.declare(metadata !{float* %f}, metadata !75), !dbg !76
+ store float 0.000000e+00, float* %f, align 4, !dbg !77
+ call void @llvm.dbg.declare(metadata !{[2 x [2 x double]]* %d}, metadata !78), !dbg !81
+ %0 = bitcast [2 x [2 x double]]* %d to i8*, !dbg !82
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([2 x [2 x double]]* @_ZZ4mainE1d to i8*), i64 32, i32 16, i1 false), !dbg !82
+ %c = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 0, !dbg !83
+ store i8 97, i8* %c, align 1, !dbg !83
+ %c2 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !84
+ %arrayidx = getelementptr inbounds [2 x i8]* %c2, i32 0, i64 0, !dbg !84
+ store i8 48, i8* %arrayidx, align 1, !dbg !84
+ %c21 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !85
+ %arrayidx2 = getelementptr inbounds [2 x i8]* %c21, i32 0, i64 1, !dbg !85
+ store i8 49, i8* %arrayidx2, align 1, !dbg !85
+ call void @llvm.dbg.declare(metadata !{double* %result}, metadata !86), !dbg !87
+ %arraydecay = getelementptr inbounds [2 x [2 x double]]* %d, i32 0, i32 0, !dbg !88
+ %call = call double @_Z15test_parametersPfPA2_dR11char_structPPitm(float* %f, [2 x double]* %arraydecay, %struct.char_struct* %s, i32** null, i16 zeroext 10, i64 42), !dbg !88
+ store double %call, double* %result, align 8, !dbg !88
+ %1 = load double* %result, align 8, !dbg !89
+ %cmp = fcmp oeq double %1, 0.000000e+00, !dbg !89
+ %cond = select i1 %cmp, i32 0, i32 -1, !dbg !89
+ ret i32 %cond, !dbg !89
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"test-inline.cpp", metadata !"/home/athirumurthi/dev/opencl-mc/build/RH64/Debug/backend/llvm", metadata !"clang version 3.0 (branches/release_30 36797)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !17} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !12, metadata !16}
+!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"test_parameters", metadata !"test_parameters", metadata !"_Z15test_parametersPfPA2_dR11char_structPPitm", metadata !6, i32 33, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, double (float*, [2 x double]*, %struct.char_struct*, i32**, i16, i64)* @_Z15test_parametersPfPA2_dR11char_structPPitm, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"test-inline.cpp", metadata !"/home/athirumurthi/dev/opencl-mc/build/RH64/Debug/backend/llvm", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 720932, null, metadata !"double", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 720932} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 40, metadata !13, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!16 = metadata !{i32 720942, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3food", metadata !6, i32 28, metadata !13, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (double)* @_Z3food, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!17 = metadata !{metadata !18}
+!18 = metadata !{metadata !19}
+!19 = metadata !{i32 720948, i32 0, null, metadata !"compound_char", metadata !"compound_char", metadata !"", metadata !6, i32 25, metadata !20, i32 0, i32 1, %struct.char_struct* @compound_char} ; [ DW_TAG_variable ]
+!20 = metadata !{i32 720898, null, metadata !"char_struct", metadata !6, i32 22, i64 24, i64 8, i32 0, i32 0, null, metadata !21, i32 0, null, null} ; [ DW_TAG_class_type ]
+!21 = metadata !{metadata !22, metadata !24, metadata !28}
+!22 = metadata !{i32 720909, metadata !20, metadata !"c", metadata !6, i32 23, i64 8, i64 8, i64 0, i32 0, metadata !23} ; [ DW_TAG_member ]
+!23 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!24 = metadata !{i32 720909, metadata !20, metadata !"c2", metadata !6, i32 24, i64 16, i64 8, i64 8, i32 0, metadata !25} ; [ DW_TAG_member ]
+!25 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !23, metadata !26, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!26 = metadata !{metadata !27}
+!27 = metadata !{i32 720929, i64 0, i64 1} ; [ DW_TAG_subrange_type ]
+!28 = metadata !{i32 720942, i32 0, metadata !20, metadata !"char_struct", metadata !"char_struct", metadata !"", metadata !6, i32 22, metadata !29, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !10} ; [ DW_TAG_subprogram ]
+!29 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!30 = metadata !{null, metadata !31}
+!31 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_pointer_type ]
+!32 = metadata !{i32 721153, metadata !5, metadata !"pf", metadata !6, i32 16777248, metadata !33, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!33 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !34} ; [ DW_TAG_pointer_type ]
+!34 = metadata !{i32 720932, null, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!35 = metadata !{i32 32, i32 31, metadata !5, null}
+!36 = metadata !{i32 721153, metadata !5, metadata !"ppd", metadata !6, i32 33554464, metadata !37, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!37 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !38} ; [ DW_TAG_pointer_type ]
+!38 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 128, i64 64, i32 0, i32 0, metadata !9, metadata !26, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!39 = metadata !{i32 32, i32 42, metadata !5, null}
+!40 = metadata !{i32 721153, metadata !5, metadata !"s", metadata !6, i32 50331680, metadata !41, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!41 = metadata !{i32 720912, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_reference_type ]
+!42 = metadata !{i32 32, i32 72, metadata !5, null}
+!43 = metadata !{i32 721153, metadata !5, metadata !"ppn", metadata !6, i32 67108896, metadata !44, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!44 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !45} ; [ DW_TAG_pointer_type ]
+!45 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ]
+!46 = metadata !{i32 32, i32 81, metadata !5, null}
+!47 = metadata !{i32 721153, metadata !5, metadata !"us", metadata !6, i32 83886112, metadata !48, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!48 = metadata !{i32 720932, null, metadata !"unsigned short", null, i32 0, i64 16, i64 16, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!49 = metadata !{i32 32, i32 105, metadata !5, null}
+!50 = metadata !{i32 721153, metadata !5, metadata !"l", metadata !6, i32 100663328, metadata !51, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!51 = metadata !{i32 720934, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !52} ; [ DW_TAG_const_type ]
+!52 = metadata !{i32 720932, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!53 = metadata !{i32 32, i32 135, metadata !5, null}
+!54 = metadata !{i32 721152, metadata !55, metadata !"result", metadata !6, i32 34, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!55 = metadata !{i32 720907, metadata !5, i32 33, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!56 = metadata !{i32 34, i32 10, metadata !55, null}
+!57 = metadata !{i32 34, i32 51, metadata !55, null}
+!58 = metadata !{i32 721152, metadata !55, metadata !"result2", metadata !6, i32 35, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!59 = metadata !{i32 35, i32 7, metadata !55, null}
+!60 = metadata !{i32 35, i32 17, metadata !55, null}
+!61 = metadata !{i32 36, i32 3, metadata !55, null}
+!62 = metadata !{i32 721153, metadata !16, metadata !"input", metadata !6, i32 16777243, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!63 = metadata !{i32 27, i32 23, metadata !16, null}
+!64 = metadata !{i32 29, i32 3, metadata !65, null}
+!65 = metadata !{i32 720907, metadata !16, i32 28, i32 1, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!66 = metadata !{i32 721153, metadata !12, metadata !"argc", metadata !6, i32 16777255, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!67 = metadata !{i32 39, i32 14, metadata !12, null}
+!68 = metadata !{i32 721153, metadata !12, metadata !"argv", metadata !6, i32 33554471, metadata !69, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!69 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !70} ; [ DW_TAG_pointer_type ]
+!70 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!71 = metadata !{i32 39, i32 26, metadata !12, null}
+!72 = metadata !{i32 721152, metadata !73, metadata !"s", metadata !6, i32 41, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!73 = metadata !{i32 720907, metadata !12, i32 40, i32 1, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!74 = metadata !{i32 41, i32 22, metadata !73, null}
+!75 = metadata !{i32 721152, metadata !73, metadata !"f", metadata !6, i32 42, metadata !34, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!76 = metadata !{i32 42, i32 9, metadata !73, null}
+!77 = metadata !{i32 42, i32 16, metadata !73, null}
+!78 = metadata !{i32 721152, metadata !73, metadata !"d", metadata !6, i32 43, metadata !79, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!79 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 256, i64 64, i32 0, i32 0, metadata !9, metadata !80, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!80 = metadata !{metadata !27, metadata !27}
+!81 = metadata !{i32 43, i32 10, metadata !73, null}
+!82 = metadata !{i32 43, i32 38, metadata !73, null}
+!83 = metadata !{i32 45, i32 3, metadata !73, null}
+!84 = metadata !{i32 46, i32 3, metadata !73, null}
+!85 = metadata !{i32 47, i32 3, metadata !73, null}
+!86 = metadata !{i32 721152, metadata !73, metadata !"result", metadata !6, i32 49, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!87 = metadata !{i32 49, i32 10, metadata !73, null}
+!88 = metadata !{i32 49, i32 19, metadata !73, null}
+!89 = metadata !{i32 50, i32 3, metadata !73, null}
diff --git a/test/JitListener/test-parameters.ll b/test/JitListener/test-parameters.ll
new file mode 100644
index 0000000000..b90a3ea356
--- /dev/null
+++ b/test/JitListener/test-parameters.ll
@@ -0,0 +1,205 @@
+; RUN: llvm-jitlistener %s | FileCheck %s
+
+; CHECK: Method load [1]: _Z15test_parametersPfPA2_dR11char_structPPitm, Size = 170
+; CHECK: Method load [2]: _Z3foov, Size = 3
+; CHECK: Method load [3]: main, Size = 146
+; CHECK: Method unload [1]
+; CHECK: Method unload [2]
+; CHECK: Method unload [3]
+
+; ModuleID = 'test-parameters.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.char_struct = type { i8, [2 x i8] }
+
+@compound_char = global %struct.char_struct zeroinitializer, align 1
+@_ZZ4mainE1d = private unnamed_addr constant [2 x [2 x double]] [[2 x double] [double 0.000000e+00, double 1.000000e+00], [2 x double] [double 2.000000e+00, double 3.000000e+00]], align 16
+
+define i32 @_Z3foov() nounwind uwtable {
+entry:
+ ret i32 0, !dbg !32
+}
+
+define double @_Z15test_parametersPfPA2_dR11char_structPPitm(float* %pf, [2 x double]* %ppd, %struct.char_struct* %s, i32** %ppn, i16 zeroext %us, i64 %l) nounwind uwtable {
+entry:
+ %pf.addr = alloca float*, align 8
+ %ppd.addr = alloca [2 x double]*, align 8
+ %s.addr = alloca %struct.char_struct*, align 8
+ %ppn.addr = alloca i32**, align 8
+ %us.addr = alloca i16, align 2
+ %l.addr = alloca i64, align 8
+ %result = alloca double, align 8
+ store float* %pf, float** %pf.addr, align 8
+ call void @llvm.dbg.declare(metadata !{float** %pf.addr}, metadata !34), !dbg !37
+ store [2 x double]* %ppd, [2 x double]** %ppd.addr, align 8
+ call void @llvm.dbg.declare(metadata !{[2 x double]** %ppd.addr}, metadata !38), !dbg !41
+ store %struct.char_struct* %s, %struct.char_struct** %s.addr, align 8
+ call void @llvm.dbg.declare(metadata !{%struct.char_struct** %s.addr}, metadata !42), !dbg !44
+ store i32** %ppn, i32*** %ppn.addr, align 8
+ call void @llvm.dbg.declare(metadata !{i32*** %ppn.addr}, metadata !45), !dbg !48
+ store i16 %us, i16* %us.addr, align 2
+ call void @llvm.dbg.declare(metadata !{i16* %us.addr}, metadata !49), !dbg !51
+ store i64 %l, i64* %l.addr, align 8
+ call void @llvm.dbg.declare(metadata !{i64* %l.addr}, metadata !52), !dbg !55
+ call void @llvm.dbg.declare(metadata !{double* %result}, metadata !56), !dbg !58
+ %0 = load float** %pf.addr, align 8, !dbg !59
+ %arrayidx = getelementptr inbounds float* %0, i64 0, !dbg !59
+ %1 = load float* %arrayidx, !dbg !59
+ %conv = fpext float %1 to double, !dbg !59
+ %2 = load [2 x double]** %ppd.addr, align 8, !dbg !59
+ %arrayidx1 = getelementptr inbounds [2 x double]* %2, i64 1, !dbg !59
+ %arrayidx2 = getelementptr inbounds [2 x double]* %arrayidx1, i32 0, i64 1, !dbg !59
+ %3 = load double* %arrayidx2, !dbg !59
+ %mul = fmul double %conv, %3, !dbg !59
+ %4 = load %struct.char_struct** %s.addr, !dbg !59
+ %c = getelementptr inbounds %struct.char_struct* %4, i32 0, i32 0, !dbg !59
+ %5 = load i8* %c, align 1, !dbg !59
+ %conv3 = sext i8 %5 to i32, !dbg !59
+ %conv4 = sitofp i32 %conv3 to double, !dbg !59
+ %mul5 = fmul double %mul, %conv4, !dbg !59
+ %6 = load i16* %us.addr, align 2, !dbg !59
+ %conv6 = zext i16 %6 to i32, !dbg !59
+ %conv7 = sitofp i32 %conv6 to double, !dbg !59
+ %mul8 = fmul double %mul5, %conv7, !dbg !59
+ %7 = load i64* %l.addr, align 8, !dbg !59
+ %conv9 = uitofp i64 %7 to double, !dbg !59
+ %mul10 = fmul double %mul8, %conv9, !dbg !59
+ %call = call i32 @_Z3foov(), !dbg !60
+ %conv11 = sitofp i32 %call to double, !dbg !60
+ %add = fadd double %mul10, %conv11, !dbg !60
+ store double %add, double* %result, align 8, !dbg !60
+ %8 = load double* %result, align 8, !dbg !61
+ ret double %8, !dbg !61
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @main(i32 %argc, i8** %argv) nounwind uwtable {
+entry:
+ %retval = alloca i32, align 4
+ %argc.addr = alloca i32, align 4
+ %argv.addr = alloca i8**, align 8
+ %s = alloca %struct.char_struct, align 1
+ %f = alloca float, align 4
+ %d = alloca [2 x [2 x double]], align 16
+ %result = alloca double, align 8
+ store i32 0, i32* %retval
+ store i32 %argc, i32* %argc.addr, align 4
+ call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !62), !dbg !63
+ store i8** %argv, i8*** %argv.addr, align 8
+ call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !64), !dbg !67
+ call void @llvm.dbg.declare(metadata !{%struct.char_struct* %s}, metadata !68), !dbg !70
+ call void @llvm.dbg.declare(metadata !{float* %f}, metadata !71), !dbg !72
+ store float 0.000000e+00, float* %f, align 4, !dbg !73
+ call void @llvm.dbg.declare(metadata !{[2 x [2 x double]]* %d}, metadata !74), !dbg !77
+ %0 = bitcast [2 x [2 x double]]* %d to i8*, !dbg !78
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([2 x [2 x double]]* @_ZZ4mainE1d to i8*), i64 32, i32 16, i1 false), !dbg !78
+ %c = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 0, !dbg !79
+ store i8 97, i8* %c, align 1, !dbg !79
+ %c2 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !80
+ %arrayidx = getelementptr inbounds [2 x i8]* %c2, i32 0, i64 0, !dbg !80
+ store i8 48, i8* %arrayidx, align 1, !dbg !80
+ %c21 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !81
+ %arrayidx2 = getelementptr inbounds [2 x i8]* %c21, i32 0, i64 1, !dbg !81
+ store i8 49, i8* %arrayidx2, align 1, !dbg !81
+ call void @llvm.dbg.declare(metadata !{double* %result}, metadata !82), !dbg !83
+ %arraydecay = getelementptr inbounds [2 x [2 x double]]* %d, i32 0, i32 0, !dbg !84
+ %call = call double @_Z15test_parametersPfPA2_dR11char_structPPitm(float* %f, [2 x double]* %arraydecay, %struct.char_struct* %s, i32** null, i16 zeroext 10, i64 42), !dbg !84
+ store double %call, double* %result, align 8, !dbg !84
+ %1 = load double* %result, align 8, !dbg !85
+ %cmp = fcmp oeq double %1, 0.000000e+00, !dbg !85
+ %cond = select i1 %cmp, i32 0, i32 -1, !dbg !85
+ ret i32 %cond, !dbg !85
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"test-parameters.cpp", metadata !"/home/athirumurthi/dev/opencl-mc/build/RH64/Debug/backend/llvm", metadata !"clang version 3.0 (branches/release_30 36797)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !17} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !12, metadata !16}
+!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !6, i32 28, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 ()* @_Z3foov, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"test-parameters.cpp", metadata !"/home/athirumurthi/dev/opencl-mc/build/RH64/Debug/backend/llvm", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 720932} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 720942, i32 0, metadata !6, metadata !"test_parameters", metadata !"test_parameters", metadata !"_Z15test_parametersPfPA2_dR11char_structPPitm", metadata !6, i32 33, metadata !13, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, double (float*, [2 x double]*, %struct.char_struct*, i32**, i16, i64)* @_Z15test_parametersPfPA2_dR11char_structPPitm, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 720932, null, metadata !"double", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!16 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 39, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!17 = metadata !{metadata !18}
+!18 = metadata !{metadata !19}
+!19 = metadata !{i32 720948, i32 0, null, metadata !"compound_char", metadata !"compound_char", metadata !"", metadata !6, i32 25, metadata !20, i32 0, i32 1, %struct.char_struct* @compound_char} ; [ DW_TAG_variable ]
+!20 = metadata !{i32 720898, null, metadata !"char_struct", metadata !6, i32 22, i64 24, i64 8, i32 0, i32 0, null, metadata !21, i32 0, null, null} ; [ DW_TAG_class_type ]
+!21 = metadata !{metadata !22, metadata !24, metadata !28}
+!22 = metadata !{i32 720909, metadata !20, metadata !"c", metadata !6, i32 23, i64 8, i64 8, i64 0, i32 0, metadata !23} ; [ DW_TAG_member ]
+!23 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!24 = metadata !{i32 720909, metadata !20, metadata !"c2", metadata !6, i32 24, i64 16, i64 8, i64 8, i32 0, metadata !25} ; [ DW_TAG_member ]
+!25 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !23, metadata !26, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!26 = metadata !{metadata !27}
+!27 = metadata !{i32 720929, i64 0, i64 1} ; [ DW_TAG_subrange_type ]
+!28 = metadata !{i32 720942, i32 0, metadata !20, metadata !"char_struct", metadata !"char_struct", metadata !"", metadata !6, i32 22, metadata !29, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !10} ; [ DW_TAG_subprogram ]
+!29 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!30 = metadata !{null, metadata !31}
+!31 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_pointer_type ]
+!32 = metadata !{i32 29, i32 3, metadata !33, null}
+!33 = metadata !{i32 720907, metadata !5, i32 28, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!34 = metadata !{i32 721153, metadata !12, metadata !"pf", metadata !6, i32 16777248, metadata !35, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!35 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !36} ; [ DW_TAG_pointer_type ]
+!36 = metadata !{i32 720932, null, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!37 = metadata !{i32 32, i32 31, metadata !12, null}
+!38 = metadata !{i32 721153, metadata !12, metadata !"ppd", metadata !6, i32 33554464, metadata !39, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!39 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !40} ; [ DW_TAG_pointer_type ]
+!40 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 128, i64 64, i32 0, i32 0, metadata !15, metadata !26, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!41 = metadata !{i32 32, i32 42, metadata !12, null}
+!42 = metadata !{i32 721153, metadata !12, metadata !"s", metadata !6, i32 50331680, metadata !43, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!43 = metadata !{i32 720912, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_reference_type ]
+!44 = metadata !{i32 32, i32 72, metadata !12, null}
+!45 = metadata !{i32 721153, metadata !12, metadata !"ppn", metadata !6, i32 67108896, metadata !46, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!46 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !47} ; [ DW_TAG_pointer_type ]
+!47 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
+!48 = metadata !{i32 32, i32 81, metadata !12, null}
+!49 = metadata !{i32 721153, metadata !12, metadata !"us", metadata !6, i32 83886112, metadata !50, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!50 = metadata !{i32 720932, null, metadata !"unsigned short", null, i32 0, i64 16, i64 16, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!51 = metadata !{i32 32, i32 105, metadata !12, null}
+!52 = metadata !{i32 721153, metadata !12, metadata !"l", metadata !6, i32 100663328, metadata !53, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!53 = metadata !{i32 720934, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !54} ; [ DW_TAG_const_type ]
+!54 = metadata !{i32 720932, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!55 = metadata !{i32 32, i32 135, metadata !12, null}
+!56 = metadata !{i32 721152, metadata !57, metadata !"result", metadata !6, i32 34, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!57 = metadata !{i32 720907, metadata !12, i32 33, i32 1, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!58 = metadata !{i32 34, i32 10, metadata !57, null}
+!59 = metadata !{i32 34, i32 59, metadata !57, null}
+!60 = metadata !{i32 34, i32 54, metadata !57, null}
+!61 = metadata !{i32 35, i32 3, metadata !57, null}
+!62 = metadata !{i32 721153, metadata !16, metadata !"argc", metadata !6, i32 16777254, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!63 = metadata !{i32 38, i32 14, metadata !16, null}
+!64 = metadata !{i32 721153, metadata !16, metadata !"argv", metadata !6, i32 33554470, metadata !65, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!65 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !66} ; [ DW_TAG_pointer_type ]
+!66 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!67 = metadata !{i32 38, i32 26, metadata !16, null}
+!68 = metadata !{i32 721152, metadata !69, metadata !"s", metadata !6, i32 40, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!69 = metadata !{i32 720907, metadata !16, i32 39, i32 1, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!70 = metadata !{i32 40, i32 22, metadata !69, null}
+!71 = metadata !{i32 721152, metadata !69, metadata !"f", metadata !6, i32 41, metadata !36, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!72 = metadata !{i32 41, i32 9, metadata !69, null}
+!73 = metadata !{i32 41, i32 16, metadata !69, null}
+!74 = metadata !{i32 721152, metadata !69, metadata !"d", metadata !6, i32 42, metadata !75, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!75 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 256, i64 64, i32 0, i32 0, metadata !15, metadata !76, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!76 = metadata !{metadata !27, metadata !27}
+!77 = metadata !{i32 42, i32 10, metadata !69, null}
+!78 = metadata !{i32 42, i32 38, metadata !69, null}
+!79 = metadata !{i32 44, i32 3, metadata !69, null}
+!80 = metadata !{i32 45, i32 3, metadata !69, null}
+!81 = metadata !{i32 46, i32 3, metadata !69, null}
+!82 = metadata !{i32 721152, metadata !69, metadata !"result", metadata !6, i32 48, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!83 = metadata !{i32 48, i32 10, metadata !69, null}
+!84 = metadata !{i32 48, i32 19, metadata !69, null}
+!85 = metadata !{i32 49, i32 3, metadata !69, null}
diff --git a/test/MC/COFF/weak-symbol-section-specification.ll b/test/MC/COFF/weak-symbol-section-specification.ll
new file mode 100644
index 0000000000..5049372959
--- /dev/null
+++ b/test/MC/COFF/weak-symbol-section-specification.ll
@@ -0,0 +1,23 @@
+; The purpose of this test is to verify that weak linkage type is not ignored by backend,
+; if section was specialized.
+
+; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o - | coff-dump.py | FileCheck %s
+
+@a = weak unnamed_addr constant { i32, i32, i32 } { i32 0, i32 0, i32 0}, section ".data"
+
+; CHECK: Name = .data$a
+; CHECK-NEXT: VirtualSize = 0
+; CHECK-NEXT: VirtualAddress = 0
+; CHECK-NEXT: SizeOfRawData = {{[0-9]+}}
+; CHECK-NEXT: PointerToRawData = 0x{{[0-9A-F]+}}
+; CHECK-NEXT: PointerToRelocations = 0x0
+; CHECK-NEXT: PointerToLineNumbers = 0x0
+; CHECK-NEXT: NumberOfRelocations = 0
+; CHECK-NEXT: NumberOfLineNumbers = 0
+; CHECK-NEXT: Charateristics = 0x40401040
+; CHECK-NEXT: IMAGE_SCN_CNT_INITIALIZED_DATA
+; CHECK-NEXT: IMAGE_SCN_LNK_COMDAT
+; CHECK-NEXT: IMAGE_SCN_ALIGN_8BYTES
+; CHECK-NEXT: IMAGE_SCN_MEM_READ
+; CHECK-NEXT: SectionData =
+; CHECK-NEXT: 00 00 00 00 00 00 00 00 - 00 00 00 00
diff --git a/test/MC/Disassembler/Mips/mips64.txt b/test/MC/Disassembler/Mips/mips64.txt
index 0a88c40839..38b1377661 100644
--- a/test/MC/Disassembler/Mips/mips64.txt
+++ b/test/MC/Disassembler/Mips/mips64.txt
@@ -1,67 +1,67 @@
-# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux | FileCheck %s
-# CHECK: .section __TEXT,__text,regular,pure_instructions
-# CHECK: daddiu $11, $26, 31949
-0x67 0x4b 0x7c 0xcd
-
-# CHECK: daddu $26, $1, $11
-0x00 0x2b 0xd0 0x2d
-
-# CHECK: ddiv $zero, $26, $22
-0x03 0x56 0x00 0x1e
-
-# CHECK: ddivu $zero, $9, $24
-0x01 0x38 0x00 0x1f
-
-# CHECK: dmfc1 $2, $f14
-0x44 0x22 0x70 0x00
-
-# CHECK: dmtc1 $23, $f5
-0x44 0xb7 0x28 0x00
-
-# CHECK: dmult $11, $26
-0x01 0x7a 0x00 0x1c
-
-# CHECK: dmultu $23, $13
-0x02 0xed 0x00 0x1d
-
-# CHECK: dsll $3, $24, 17
-0x00 0x18 0x1c 0x78
-
-# CHECK: dsllv $gp, $27, $24
-0x03 0x1b 0xe0 0x14
-
-# CHECK: dsra $1, $1, 30
-0x00 0x01 0x0f 0xbb
-
-# CHECK: dsrav $1, $1, $fp
-0x03 0xc1 0x08 0x17
-
-# CHECK: dsrl $10, $gp, 24
-0x00 0x1c 0x56 0x3a
-
-# CHECK: dsrlv $gp, $10, $23
-0x02 0xea 0xe0 0x16
-
-# CHECK: dsubu $gp, $27, $24
-0x03 0x78 0xe0 0x2f
-
-# CHECK: lw $27, -15155($1)
-0x8c 0x3b 0xc4 0xcd
-
-# CHECK: lui $1, 1
-0x3c 0x01 0x00 0x01
-
-# CHECK: lwu $3, -1746($3)
-0x9c 0x63 0xf9 0x2e
-
-# CHECK: lui $ra, 1
-0x3c 0x1f 0x00 0x01
-
-# CHECK: sw $26, -15159($1)
-0xac 0x3a 0xc4 0xc9
-
-# CHECK: ld $26, 3958($zero)
-0xdc 0x1a 0x0f 0x76
-
-# CHECK: sd $6, 17767($zero)
-0xfc 0x06 0x45 0x67
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux | FileCheck %s
+# CHECK: .section __TEXT,__text,regular,pure_instructions
+# CHECK: daddiu $11, $26, 31949
+0x67 0x4b 0x7c 0xcd
+
+# CHECK: daddu $26, $1, $11
+0x00 0x2b 0xd0 0x2d
+
+# CHECK: ddiv $zero, $26, $22
+0x03 0x56 0x00 0x1e
+
+# CHECK: ddivu $zero, $9, $24
+0x01 0x38 0x00 0x1f
+
+# CHECK: dmfc1 $2, $f14
+0x44 0x22 0x70 0x00
+
+# CHECK: dmtc1 $23, $f5
+0x44 0xb7 0x28 0x00
+
+# CHECK: dmult $11, $26
+0x01 0x7a 0x00 0x1c
+
+# CHECK: dmultu $23, $13
+0x02 0xed 0x00 0x1d
+
+# CHECK: dsll $3, $24, 17
+0x00 0x18 0x1c 0x78
+
+# CHECK: dsllv $gp, $27, $24
+0x03 0x1b 0xe0 0x14
+
+# CHECK: dsra $1, $1, 30
+0x00 0x01 0x0f 0xbb
+
+# CHECK: dsrav $1, $1, $fp
+0x03 0xc1 0x08 0x17
+
+# CHECK: dsrl $10, $gp, 24
+0x00 0x1c 0x56 0x3a
+
+# CHECK: dsrlv $gp, $10, $23
+0x02 0xea 0xe0 0x16
+
+# CHECK: dsubu $gp, $27, $24
+0x03 0x78 0xe0 0x2f
+
+# CHECK: lw $27, -15155($1)
+0x8c 0x3b 0xc4 0xcd
+
+# CHECK: lui $1, 1
+0x3c 0x01 0x00 0x01
+
+# CHECK: lwu $3, -1746($3)
+0x9c 0x63 0xf9 0x2e
+
+# CHECK: lui $ra, 1
+0x3c 0x1f 0x00 0x01
+
+# CHECK: sw $26, -15159($1)
+0xac 0x3a 0xc4 0xc9
+
+# CHECK: ld $26, 3958($zero)
+0xdc 0x1a 0x0f 0x76
+
+# CHECK: sd $6, 17767($zero)
+0xfc 0x06 0x45 0x67
diff --git a/test/MC/Disassembler/Mips/mips64_le.txt b/test/MC/Disassembler/Mips/mips64_le.txt
index fe8faffa83..a7ef0e473b 100644
--- a/test/MC/Disassembler/Mips/mips64_le.txt
+++ b/test/MC/Disassembler/Mips/mips64_le.txt
@@ -1,67 +1,67 @@
-# RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux | FileCheck %s
-# CHECK: .section __TEXT,__text,regular,pure_instructions
-# CHECK: daddiu $11, $26, 31949
-0xcd 0x7c 0x4b 0x67
-
-# CHECK: daddu $26, $1, $11
-0x2d 0xd0 0x2b 0x00
-
-# CHECK: ddiv $zero, $26, $22
-0x1e 0x00 0x56 0x03
-
-# CHECK: ddivu $zero, $9, $24
-0x1f 0x00 0x38 0x01
-
-# CHECK: dmfc1 $2, $f14
-0x00 0x70 0x22 0x44
-
-# CHECK: dmtc1 $23, $f5
-0x00 0x28 0xb7 0x44
-
-# CHECK: dmult $11, $26
-0x1c 0x00 0x7a 0x01
-
-# CHECK: dmultu $23, $13
-0x1d 0x00 0xed 0x02
-
-# CHECK: dsll $3, $24, 17
-0x78 0x1c 0x18 0x00
-
-# CHECK: dsllv $gp, $27, $24
-0x14 0xe0 0x1b 0x03
-
-# CHECK: dsra $1, $1, 30
-0xbb 0x0f 0x01 0x00
-
-# CHECK: dsrav $1, $1, $fp
-0x17 0x08 0xc1 0x03
-
-# CHECK: dsrl $10, $gp, 24
-0x3a 0x56 0x1c 0x00
-
-# CHECK: dsrlv $gp, $10, $23
-0x16 0xe0 0xea 0x02
-
-# CHECK: dsubu $gp, $27, $24
-0x2f 0xe0 0x78 0x03
-
-# CHECK: lw $27, -15155($1)
-0xcd 0xc4 0x3b 0x8c
-
-# CHECK: lui $1, 1
-0x01 0x00 0x01 0x3c
-
-# CHECK: lwu $3, -1746($3)
-0x2e 0xf9 0x63 0x9c
-
-# CHECK: lui $ra, 1
-0x01 0x00 0x1f 0x3c
-
-# CHECK: sw $26, -15159($1)
-0xc9 0xc4 0x3a 0xac
-
-# CHECK: ld $26, 3958($zero)
-0x76 0x0f 0x1a 0xdc
-
-# CHECK: sd $6, 17767($zero)
-0x67 0x45 0x06 0xfc
+# RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux | FileCheck %s
+# CHECK: .section __TEXT,__text,regular,pure_instructions
+# CHECK: daddiu $11, $26, 31949
+0xcd 0x7c 0x4b 0x67
+
+# CHECK: daddu $26, $1, $11
+0x2d 0xd0 0x2b 0x00
+
+# CHECK: ddiv $zero, $26, $22
+0x1e 0x00 0x56 0x03
+
+# CHECK: ddivu $zero, $9, $24
+0x1f 0x00 0x38 0x01
+
+# CHECK: dmfc1 $2, $f14
+0x00 0x70 0x22 0x44
+
+# CHECK: dmtc1 $23, $f5
+0x00 0x28 0xb7 0x44
+
+# CHECK: dmult $11, $26
+0x1c 0x00 0x7a 0x01
+
+# CHECK: dmultu $23, $13
+0x1d 0x00 0xed 0x02
+
+# CHECK: dsll $3, $24, 17
+0x78 0x1c 0x18 0x00
+
+# CHECK: dsllv $gp, $27, $24
+0x14 0xe0 0x1b 0x03
+
+# CHECK: dsra $1, $1, 30
+0xbb 0x0f 0x01 0x00
+
+# CHECK: dsrav $1, $1, $fp
+0x17 0x08 0xc1 0x03
+
+# CHECK: dsrl $10, $gp, 24
+0x3a 0x56 0x1c 0x00
+
+# CHECK: dsrlv $gp, $10, $23
+0x16 0xe0 0xea 0x02
+
+# CHECK: dsubu $gp, $27, $24
+0x2f 0xe0 0x78 0x03
+
+# CHECK: lw $27, -15155($1)
+0xcd 0xc4 0x3b 0x8c
+
+# CHECK: lui $1, 1
+0x01 0x00 0x01 0x3c
+
+# CHECK: lwu $3, -1746($3)
+0x2e 0xf9 0x63 0x9c
+
+# CHECK: lui $ra, 1
+0x01 0x00 0x1f 0x3c
+
+# CHECK: sw $26, -15159($1)
+0xc9 0xc4 0x3a 0xac
+
+# CHECK: ld $26, 3958($zero)
+0x76 0x0f 0x1a 0xdc
+
+# CHECK: sd $6, 17767($zero)
+0x67 0x45 0x06 0xfc
diff --git a/test/MC/Disassembler/Mips/mips64r2.txt b/test/MC/Disassembler/Mips/mips64r2.txt
index 2dfde0d231..0b421fc551 100644
--- a/test/MC/Disassembler/Mips/mips64r2.txt
+++ b/test/MC/Disassembler/Mips/mips64r2.txt
@@ -1,91 +1,91 @@
-# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mattr +mips64r2 | FileCheck %s
-# CHECK: .section __TEXT,__text,regular,pure_instructions
-# CHECK: daddiu $11, $26, 31949
-0x67 0x4b 0x7c 0xcd
-
-# CHECK: daddu $26, $1, $11
-0x00 0x2b 0xd0 0x2d
-
-# CHECK: ddiv $zero, $26, $22
-0x03 0x56 0x00 0x1e
-
-# CHECK: ddivu $zero, $9, $24
-0x01 0x38 0x00 0x1f
-
-# CHECK: dmfc1 $2, $f14
-0x44 0x22 0x70 0x00
-
-# CHECK: dmtc1 $23, $f5
-0x44 0xb7 0x28 0x00
-
-# CHECK: dmult $11, $26
-0x01 0x7a 0x00 0x1c
-
-# CHECK: dmultu $23, $13
-0x02 0xed 0x00 0x1d
-
-# CHECK: dsll $3, $24, 17
-0x00 0x18 0x1c 0x78
-
-# CHECK: dsllv $gp, $27, $24
-0x03 0x1b 0xe0 0x14
-
-# CHECK: dsra $1, $1, 30
-0x00 0x01 0x0f 0xbb
-
-# CHECK: dsrav $1, $1, $fp
-0x03 0xc1 0x08 0x17
-
-# CHECK: dsrl $10, $gp, 24
-0x00 0x1c 0x56 0x3a
-
-# CHECK: dsrlv $gp, $10, $23
-0x02 0xea 0xe0 0x16
-
-# CHECK: dsubu $gp, $27, $24
-0x03 0x78 0xe0 0x2f
-
-# CHECK: lw $27, -15155($1)
-0x8c 0x3b 0xc4 0xcd
-
-# CHECK: lui $1, 1
-0x3c 0x01 0x00 0x01
-
-# CHECK: lwu $3, -1746($3)
-0x9c 0x63 0xf9 0x2e
-
-# CHECK: lui $ra, 1
-0x3c 0x1f 0x00 0x01
-
-# CHECK: sw $26, -15159($1)
-0xac 0x3a 0xc4 0xc9
-
-# CHECK: ld $26, 3958($zero)
-0xdc 0x1a 0x0f 0x76
-
-# CHECK: sd $6, 17767($zero)
-0xfc 0x06 0x45 0x67
-
-# CHECK: dclo $9, $24
-0x73 0x09 0x48 0x25
-
-# CHECK: dclz $26, $9
-0x71 0x3a 0xd0 0x24
-
-# CHECK: dext $7, $gp, 29, 31
-0x7f 0x87 0xf7 0x43
-
-# CHECK: dins $20, $gp, 15, 1
-0x7f 0x94 0x7b 0xc7
-
-# CHECK: dsbh $7, $gp
-0x7c 0x1c 0x38 0xa4
-
-# CHECK: dshd $3, $14
-0x7c 0x0e 0x19 0x64
-
-# CHECK: drotr $20, $27, 6
-0x00 0x3b 0xa1 0xba
-
-# CHECK: drotrv $24, $23, $5
-0x00 0xb7 0xc0 0x56
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mattr +mips64r2 | FileCheck %s
+# CHECK: .section __TEXT,__text,regular,pure_instructions
+# CHECK: daddiu $11, $26, 31949
+0x67 0x4b 0x7c 0xcd
+
+# CHECK: daddu $26, $1, $11
+0x00 0x2b 0xd0 0x2d
+
+# CHECK: ddiv $zero, $26, $22
+0x03 0x56 0x00 0x1e
+
+# CHECK: ddivu $zero, $9, $24
+0x01 0x38 0x00 0x1f
+
+# CHECK: dmfc1 $2, $f14
+0x44 0x22 0x70 0x00
+
+# CHECK: dmtc1 $23, $f5
+0x44 0xb7 0x28 0x00
+
+# CHECK: dmult $11, $26
+0x01 0x7a 0x00 0x1c
+
+# CHECK: dmultu $23, $13
+0x02 0xed 0x00 0x1d
+
+# CHECK: dsll $3, $24, 17
+0x00 0x18 0x1c 0x78
+
+# CHECK: dsllv $gp, $27, $24
+0x03 0x1b 0xe0 0x14
+
+# CHECK: dsra $1, $1, 30
+0x00 0x01 0x0f 0xbb
+
+# CHECK: dsrav $1, $1, $fp
+0x03 0xc1 0x08 0x17
+
+# CHECK: dsrl $10, $gp, 24
+0x00 0x1c 0x56 0x3a
+
+# CHECK: dsrlv $gp, $10, $23
+0x02 0xea 0xe0 0x16
+
+# CHECK: dsubu $gp, $27, $24
+0x03 0x78 0xe0 0x2f
+
+# CHECK: lw $27, -15155($1)
+0x8c 0x3b 0xc4 0xcd
+
+# CHECK: lui $1, 1
+0x3c 0x01 0x00 0x01
+
+# CHECK: lwu $3, -1746($3)
+0x9c 0x63 0xf9 0x2e
+
+# CHECK: lui $ra, 1
+0x3c 0x1f 0x00 0x01
+
+# CHECK: sw $26, -15159($1)
+0xac 0x3a 0xc4 0xc9
+
+# CHECK: ld $26, 3958($zero)
+0xdc 0x1a 0x0f 0x76
+
+# CHECK: sd $6, 17767($zero)
+0xfc 0x06 0x45 0x67
+
+# CHECK: dclo $9, $24
+0x73 0x09 0x48 0x25
+
+# CHECK: dclz $26, $9
+0x71 0x3a 0xd0 0x24
+
+# CHECK: dext $7, $gp, 29, 31
+0x7f 0x87 0xf7 0x43
+
+# CHECK: dins $20, $gp, 15, 1
+0x7f 0x94 0x7b 0xc7
+
+# CHECK: dsbh $7, $gp
+0x7c 0x1c 0x38 0xa4
+
+# CHECK: dshd $3, $14
+0x7c 0x0e 0x19 0x64
+
+# CHECK: drotr $20, $27, 6
+0x00 0x3b 0xa1 0xba
+
+# CHECK: drotrv $24, $23, $5
+0x00 0xb7 0xc0 0x56
diff --git a/test/MC/Disassembler/Mips/mips64r2_le.txt b/test/MC/Disassembler/Mips/mips64r2_le.txt
index 620d9ebe8d..c1d326f6d6 100644
--- a/test/MC/Disassembler/Mips/mips64r2_le.txt
+++ b/test/MC/Disassembler/Mips/mips64r2_le.txt
@@ -1,91 +1,91 @@
-# RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux -mattr +mips64r2 | FileCheck %s
-# CHECK: .section __TEXT,__text,regular,pure_instructions
-# CHECK: daddiu $11, $26, 31949
-0xcd 0x7c 0x4b 0x67
-
-# CHECK: daddu $26, $1, $11
-0x2d 0xd0 0x2b 0x00
-
-# CHECK: ddiv $zero, $26, $22
-0x1e 0x00 0x56 0x03
-
-# CHECK: ddivu $zero, $9, $24
-0x1f 0x00 0x38 0x01
-
-# CHECK: dmfc1 $2, $f14
-0x00 0x70 0x22 0x44
-
-# CHECK: dmtc1 $23, $f5
-0x00 0x28 0xb7 0x44
-
-# CHECK: dmult $11, $26
-0x1c 0x00 0x7a 0x01
-
-# CHECK: dmultu $23, $13
-0x1d 0x00 0xed 0x02
-
-# CHECK: dsll $3, $24, 17
-0x78 0x1c 0x18 0x00
-
-# CHECK: dsllv $gp, $27, $24
-0x14 0xe0 0x1b 0x03
-
-# CHECK: dsra $1, $1, 30
-0xbb 0x0f 0x01 0x00
-
-# CHECK: dsrav $1, $1, $fp
-0x17 0x08 0xc1 0x03
-
-# CHECK: dsrl $10, $gp, 24
-0x3a 0x56 0x1c 0x00
-
-# CHECK: dsrlv $gp, $10, $23
-0x16 0xe0 0xea 0x02
-
-# CHECK: dsubu $gp, $27, $24
-0x2f 0xe0 0x78 0x03
-
-# CHECK: lw $27, -15155($1)
-0xcd 0xc4 0x3b 0x8c
-
-# CHECK: lui $1, 1
-0x01 0x00 0x01 0x3c
-
-# CHECK: lwu $3, -1746($3)
-0x2e 0xf9 0x63 0x9c
-
-# CHECK: lui $ra, 1
-0x01 0x00 0x1f 0x3c
-
-# CHECK: sw $26, -15159($1)
-0xc9 0xc4 0x3a 0xac
-
-# CHECK: ld $26, 3958($zero)
-0x76 0x0f 0x1a 0xdc
-
-# CHECK: sd $6, 17767($zero)
-0x67 0x45 0x06 0xfc
-
-# CHECK: dclo $9, $24
-0x25 0x48 0x09 0x73
-
-# CHECK: dclz $26, $9
-0x24 0xd0 0x3a 0x71
-
-# CHECK: dext $7, $gp, 29, 31
-0x43 0xf7 0x87 0x7f
-
-# CHECK: dins $20, $gp, 15, 1
-0xc7 0x7b 0x94 0x7f
-
-# CHECK: dsbh $7, $gp
-0xa4 0x38 0x1c 0x7c
-
-# CHECK: dshd $3, $14
-0x64 0x19 0x0e 0x7c
-
-# CHECK: drotr $20, $27, 6
-0xba 0xa1 0x3b 0x00
-
-# CHECK: drotrv $24, $23, $5
-0x56 0xc0 0xb7 0x00
+# RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux -mattr +mips64r2 | FileCheck %s
+# CHECK: .section __TEXT,__text,regular,pure_instructions
+# CHECK: daddiu $11, $26, 31949
+0xcd 0x7c 0x4b 0x67
+
+# CHECK: daddu $26, $1, $11
+0x2d 0xd0 0x2b 0x00
+
+# CHECK: ddiv $zero, $26, $22
+0x1e 0x00 0x56 0x03
+
+# CHECK: ddivu $zero, $9, $24
+0x1f 0x00 0x38 0x01
+
+# CHECK: dmfc1 $2, $f14
+0x00 0x70 0x22 0x44
+
+# CHECK: dmtc1 $23, $f5
+0x00 0x28 0xb7 0x44
+
+# CHECK: dmult $11, $26
+0x1c 0x00 0x7a 0x01
+
+# CHECK: dmultu $23, $13
+0x1d 0x00 0xed 0x02
+
+# CHECK: dsll $3, $24, 17
+0x78 0x1c 0x18 0x00
+
+# CHECK: dsllv $gp, $27, $24
+0x14 0xe0 0x1b 0x03
+
+# CHECK: dsra $1, $1, 30
+0xbb 0x0f 0x01 0x00
+
+# CHECK: dsrav $1, $1, $fp
+0x17 0x08 0xc1 0x03
+
+# CHECK: dsrl $10, $gp, 24
+0x3a 0x56 0x1c 0x00
+
+# CHECK: dsrlv $gp, $10, $23
+0x16 0xe0 0xea 0x02
+
+# CHECK: dsubu $gp, $27, $24
+0x2f 0xe0 0x78 0x03
+
+# CHECK: lw $27, -15155($1)
+0xcd 0xc4 0x3b 0x8c
+
+# CHECK: lui $1, 1
+0x01 0x00 0x01 0x3c
+
+# CHECK: lwu $3, -1746($3)
+0x2e 0xf9 0x63 0x9c
+
+# CHECK: lui $ra, 1
+0x01 0x00 0x1f 0x3c
+
+# CHECK: sw $26, -15159($1)
+0xc9 0xc4 0x3a 0xac
+
+# CHECK: ld $26, 3958($zero)
+0x76 0x0f 0x1a 0xdc
+
+# CHECK: sd $6, 17767($zero)
+0x67 0x45 0x06 0xfc
+
+# CHECK: dclo $9, $24
+0x25 0x48 0x09 0x73
+
+# CHECK: dclz $26, $9
+0x24 0xd0 0x3a 0x71
+
+# CHECK: dext $7, $gp, 29, 31
+0x43 0xf7 0x87 0x7f
+
+# CHECK: dins $20, $gp, 15, 1
+0xc7 0x7b 0x94 0x7f
+
+# CHECK: dsbh $7, $gp
+0xa4 0x38 0x1c 0x7c
+
+# CHECK: dshd $3, $14
+0x64 0x19 0x0e 0x7c
+
+# CHECK: drotr $20, $27, 6
+0xba 0xa1 0x3b 0x00
+
+# CHECK: drotrv $24, $23, $5
+0x56 0xc0 0xb7 0x00
diff --git a/test/MC/Disassembler/X86/enhanced.txt b/test/MC/Disassembler/X86/enhanced.txt
index deff735b69..97b0fa4ab5 100644
--- a/test/MC/Disassembler/X86/enhanced.txt
+++ b/test/MC/Disassembler/X86/enhanced.txt
@@ -1,10 +1,10 @@
# RUN: llvm-mc --edis %s -triple=x86_64-apple-darwin9 2>&1 | FileCheck %s
-# CHECK: [o:jne][w: ][0-p:-][0-l:10=10] <br> 0:[RIP/112](pc)=18446744073709551606
+# CHECK: [o:jne][w: ][0-p:-][0-l:10=10] <br> 0:[RIP/{{[0-9]+}}](pc)=18446744073709551606
0x0f 0x85 0xf6 0xff 0xff 0xff
-# CHECK: [o:movq][w: ][1-r:%gs=r64][1-p::][1-l:8=8][p:,][w: ][0-r:%rcx=r109] <mov> 0:[RCX/109]=0 1:[GS/64]=8
+# CHECK: [o:movq][w: ][1-r:%gs=r{{[0-9]+}}][1-p::][1-l:8=8][p:,][w: ][0-r:%rcx=r{{[0-9]+}}] <mov> 0:[RCX/{{[0-9]+}}]=0 1:[GS/{{[0-9]+}}]=8
0x65 0x48 0x8b 0x0c 0x25 0x08 0x00 0x00 0x00
-# CHECK: [o:xorps][w: ][2-r:%xmm1=r130][p:,][w: ][0-r:%xmm2=r131] 0:[XMM2/131]=0 1:[XMM2/131]=0 2:[XMM1/130]=0
+# CHECK: [o:xorps][w: ][2-r:%xmm1=r{{[0-9]+}}][p:,][w: ][0-r:%xmm2=r{{[0-9]+}}] 0:[XMM2/{{[0-9]+}}]=0 1:[XMM2/{{[0-9]+}}]=0 2:[XMM1/{{[0-9]+}}]=0
0x0f 0x57 0xd1
-# CHECK: [o:andps][w: ][2-r:%xmm1=r130][p:,][w: ][0-r:%xmm2=r131] 0:[XMM2/131]=0 1:[XMM2/131]=0 2:[XMM1/130]=0
+# CHECK: [o:andps][w: ][2-r:%xmm1=r{{[0-9]+}}][p:,][w: ][0-r:%xmm2=r{{[0-9]+}}] 0:[XMM2/{{[0-9]+}}]=0 1:[XMM2/{{[0-9]+}}]=0 2:[XMM1/{{[0-9]+}}]=0
0x0f 0x54 0xd1
diff --git a/test/MC/ELF/cfi-register.s b/test/MC/ELF/cfi-register.s
new file mode 100644
index 0000000000..3772309703
--- /dev/null
+++ b/test/MC/ELF/cfi-register.s
@@ -0,0 +1,42 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump --dump-section-data | FileCheck %s
+
+f:
+ .cfi_startproc
+ nop
+ .cfi_register %rbp, %rax
+ nop
+ .cfi_endproc
+
+// CHECK: # Section 4
+// CHECK-NEXT: (('sh_name', 0x00000011) # '.eh_frame'
+// CHECK-NEXT: ('sh_type', 0x00000001)
+// CHECK-NEXT: ('sh_flags', 0x0000000000000002)
+// CHECK-NEXT: ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT: ('sh_offset', 0x0000000000000048)
+// CHECK-NEXT: ('sh_size', 0x0000000000000030)
+// CHECK-NEXT: ('sh_link', 0x00000000)
+// CHECK-NEXT: ('sh_info', 0x00000000)
+// CHECK-NEXT: ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT: ('sh_entsize', 0x0000000000000000)
+// CHECK-NEXT: ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410906 00000000')
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Section 5
+// CHECK-NEXT: (('sh_name', 0x0000000c) # '.rela.eh_frame'
+// CHECK-NEXT: ('sh_type', 0x00000004)
+// CHECK-NEXT: ('sh_flags', 0x0000000000000000)
+// CHECK-NEXT: ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT: ('sh_offset', 0x0000000000000390)
+// CHECK-NEXT: ('sh_size', 0x0000000000000018)
+// CHECK-NEXT: ('sh_link', 0x00000007)
+// CHECK-NEXT: ('sh_info', 0x00000004)
+// CHECK-NEXT: ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT: ('sh_entsize', 0x0000000000000018)
+// CHECK-NEXT: ('_relocations', [
+// CHECK-NEXT: # Relocation 0
+// CHECK-NEXT: (('r_offset', 0x0000000000000020)
+// CHECK-NEXT: ('r_sym', 0x00000002)
+// CHECK-NEXT: ('r_type', 0x00000002)
+// CHECK-NEXT: ('r_addend', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: ])
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/cfi-undefined.s b/test/MC/ELF/cfi-undefined.s
new file mode 100644
index 0000000000..28049faec2
--- /dev/null
+++ b/test/MC/ELF/cfi-undefined.s
@@ -0,0 +1,41 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump --dump-section-data | FileCheck %s
+
+f:
+ .cfi_startproc
+ nop
+ .cfi_undefined %rbp
+ nop
+ .cfi_endproc
+// CHECK: # Section 4
+// CHECK-NEXT: (('sh_name', 0x00000011) # '.eh_frame'
+// CHECK-NEXT: ('sh_type', 0x00000001)
+// CHECK-NEXT: ('sh_flags', 0x0000000000000002)
+// CHECK-NEXT: ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT: ('sh_offset', 0x0000000000000048)
+// CHECK-NEXT: ('sh_size', 0x0000000000000030)
+// CHECK-NEXT: ('sh_link', 0x00000000)
+// CHECK-NEXT: ('sh_info', 0x00000000)
+// CHECK-NEXT: ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT: ('sh_entsize', 0x0000000000000000)
+// CHECK-NEXT: ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410706 00000000')
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Section 5
+// CHECK-NEXT: (('sh_name', 0x0000000c) # '.rela.eh_frame'
+// CHECK-NEXT: ('sh_type', 0x00000004)
+// CHECK-NEXT: ('sh_flags', 0x0000000000000000)
+// CHECK-NEXT: ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT: ('sh_offset', 0x0000000000000390)
+// CHECK-NEXT: ('sh_size', 0x0000000000000018)
+// CHECK-NEXT: ('sh_link', 0x00000007)
+// CHECK-NEXT: ('sh_info', 0x00000004)
+// CHECK-NEXT: ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT: ('sh_entsize', 0x0000000000000018)
+// CHECK-NEXT: ('_relocations', [
+// CHECK-NEXT: # Relocation 0
+// CHECK-NEXT: (('r_offset', 0x0000000000000020)
+// CHECK-NEXT: ('r_sym', 0x00000002)
+// CHECK-NEXT: ('r_type', 0x00000002)
+// CHECK-NEXT: ('r_addend', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: ])
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/gen-dwarf.s b/test/MC/ELF/gen-dwarf.s
index b090e0802b..85e02428fe 100644
--- a/test/MC/ELF/gen-dwarf.s
+++ b/test/MC/ELF/gen-dwarf.s
@@ -1,8 +1,9 @@
// RUN: llvm-mc -g -triple i686-pc-linux-gnu %s -filetype=obj -o - | elf-dump | FileCheck %s
-// Test that on ELF the debug info has a relocation to debug_abbrev and one to
-// to debug_line.
+// Test that on ELF:
+// 1. the debug info has a relocation to debug_abbrev and one to to debug_line.
+// 2. the debug_aranges has relocations to text and debug_line.
.text
@@ -47,6 +48,34 @@ foo:
// CHECK: # Section 8
// CHECK-NEXT: (('sh_name', 0x00000001) # '.debug_abbrev'
+// Section 9 is .debug_aranges
+// CHECK: # Section 9
+// CHECK-NEXT: (('sh_name', 0x0000001e) # '.debug_aranges'
+
+// Two relocations in .debug_aranges, one to text and one to debug_info.
+// CHECK: # '.rel.debug_aranges'
+// CHECK: # Relocation 0
+// CHECK-NEXT: (('r_offset', 0x00000006)
+// CHECK-NEXT: ('r_sym', 0x000005)
+// CHECK-NEXT: ('r_type', 0x01)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Relocation 1
+// CHECK-NEXT: (('r_offset', 0x00000010)
+// CHECK-NEXT: ('r_sym', 0x000001)
+// CHECK-NEXT: ('r_type', 0x01)
+// CHECK-NEXT: ),
+
+// Symbol 1 is section 1 (.text)
+// CHECK: # Symbol 1
+// CHECK-NEXT: (('st_name', 0x00000000) # ''
+// CHECK-NEXT: ('st_value', 0x00000000)
+// CHECK-NEXT: ('st_size', 0x00000000)
+// CHECK-NEXT: ('st_bind', 0x0)
+// CHECK-NEXT: ('st_type', 0x3)
+// CHECK-NEXT: ('st_other', 0x00)
+// CHECK-NEXT: ('st_shndx', 0x0001)
+// CHECK-NEXT: ),
+
// Symbol 4 is section 4 (.debug_line)
// CHECK: # Symbol 4
// CHECK-NEXT: (('st_name', 0x00000000) # ''
@@ -58,6 +87,17 @@ foo:
// CHECK-NEXT: ('st_shndx', 0x0004)
// CHECK-NEXT: ),
+// Symbol 5 is section 6 (.debug_info)
+// CHECK: # Symbol 5
+// CHECK-NEXT: (('st_name', 0x00000000) # ''
+// CHECK-NEXT: ('st_value', 0x00000000)
+// CHECK-NEXT: ('st_size', 0x00000000)
+// CHECK-NEXT: ('st_bind', 0x0)
+// CHECK-NEXT: ('st_type', 0x3)
+// CHECK-NEXT: ('st_other', 0x00)
+// CHECK-NEXT: ('st_shndx', 0x0006)
+// CHECK-NEXT: ),
+
// Symbol 6 is section 8 (.debug_abbrev)
// CHECK: # Symbol 6
// CHECK-NEXT: (('st_name', 0x00000000) # ''
diff --git a/test/MC/Mips/xgot.ll b/test/MC/Mips/xgot.ll
new file mode 100644
index 0000000000..bfe9b9ad66
--- /dev/null
+++ b/test/MC/Mips/xgot.ll
@@ -0,0 +1,42 @@
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mxgot %s -o - | elf-dump --dump-section-data | FileCheck %s
+
+@.str = private unnamed_addr constant [16 x i8] c"ext_1=%d, i=%d\0A\00", align 1
+@ext_1 = external global i32
+
+define void @fill() nounwind {
+entry:
+
+; Check that the appropriate relocations were created.
+; For the xgot case we want to see R_MIPS_[GOT|CALL]_[HI|LO]16.
+
+; R_MIPS_HI16
+; CHECK: ('r_type', 0x05)
+
+; R_MIPS_LO16
+; CHECK: ('r_type', 0x06)
+
+; R_MIPS_GOT_HI16
+; CHECK: ('r_type', 0x16)
+
+; R_MIPS_GOT_LO16
+; CHECK: ('r_type', 0x17)
+
+; R_MIPS_GOT
+; CHECK: ('r_type', 0x09)
+
+; R_MIPS_LO16
+; CHECK: ('r_type', 0x06)
+
+; R_MIPS_CALL_HI16
+; CHECK: ('r_type', 0x1e)
+
+; R_MIPS_CALL_LO16
+; CHECK: ('r_type', 0x1f)
+
+ %0 = load i32* @ext_1, align 4
+ %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str, i32 0, i32 0), i32 %0) nounwind
+ ret void
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
diff --git a/test/MC/PowerPC/ppc64-relocs-01.ll b/test/MC/PowerPC/ppc64-relocs-01.ll
index 5996af84f4..4919e91400 100644
--- a/test/MC/PowerPC/ppc64-relocs-01.ll
+++ b/test/MC/PowerPC/ppc64-relocs-01.ll
@@ -1,4 +1,4 @@
-;; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -O3 \
+;; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -O3 -code-model=small \
;; RUN: -filetype=obj %s -o - | \
;; RUN: elf-dump --dump-section-data | FileCheck %s
diff --git a/test/MC/PowerPC/ppc64-tls-relocs-01.ll b/test/MC/PowerPC/ppc64-tls-relocs-01.ll
new file mode 100644
index 0000000000..5e37311075
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-tls-relocs-01.ll
@@ -0,0 +1,28 @@
+;; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -filetype=obj %s -o - | \
+;; RUN: elf-dump --dump-section-data | FileCheck %s
+
+;; FIXME: this file should be in .s form, change when asm parser is available.
+
+@t = thread_local global i32 0, align 4
+
+define i32* @f() nounwind {
+entry:
+ ret i32* @t
+}
+
+;; Check for a pair of R_PPC64_TPREL16_HA / R_PPC64_TPREL16_LO relocs
+;; against the thread-local symbol 't'.
+;; CHECK: '.rela.text'
+;; CHECK: Relocation 0
+;; CHECK-NEXT: 'r_offset',
+;; CHECK-NEXT: 'r_sym', 0x00000008
+;; CHECK-NEXT: 'r_type', 0x00000048
+;; CHECK: Relocation 1
+;; CHECK-NEXT: 'r_offset',
+;; CHECK-NEXT: 'r_sym', 0x00000008
+;; CHECK-NEXT: 'r_type', 0x00000046
+
+;; Check that we got the correct symbol.
+;; CHECK: Symbol 8
+;; CHECK-NEXT: 't'
+
diff --git a/test/MC/X86/x86_errors.s b/test/MC/X86/x86_errors.s
index f161e06cb5..6e14d62fda 100644
--- a/test/MC/X86/x86_errors.s
+++ b/test/MC/X86/x86_errors.s
@@ -18,7 +18,7 @@ addl $0, 0(%rax)
movl 0(%rax), 0(%edx) // error: invalid operand for instruction
-// 32: error: instruction requires a CPU feature not currently enabled
+// 32: error: instruction requires: 64-bit mode
sysexitq
// rdar://10710167
diff --git a/test/Object/Inputs/coff_archive.lib b/test/Object/Inputs/coff_archive.lib
new file mode 100755
index 0000000000..e079991bfb
--- /dev/null
+++ b/test/Object/Inputs/coff_archive.lib
Binary files differ
diff --git a/test/Object/Inputs/liblong_filenames.a b/test/Object/Inputs/liblong_filenames.a
new file mode 100644
index 0000000000..368d665c94
--- /dev/null
+++ b/test/Object/Inputs/liblong_filenames.a
Binary files differ
diff --git a/test/Object/Inputs/libsimple_archive.a b/test/Object/Inputs/libsimple_archive.a
new file mode 100644
index 0000000000..6e232e3e3c
--- /dev/null
+++ b/test/Object/Inputs/libsimple_archive.a
Binary files differ
diff --git a/test/Object/archive-long-index.test b/test/Object/archive-long-index.test
new file mode 100644
index 0000000000..d0fb19cd8d
--- /dev/null
+++ b/test/Object/archive-long-index.test
@@ -0,0 +1,40 @@
+#
+# Check if the index is appearing properly in the output file
+#
+RUN: llvm-nm -s %p/Inputs/liblong_filenames.a | FileCheck -check-prefix=CHECKIDX %s
+
+CHECKIDX: Archive map
+CHECKIDX: abcdefghijklmnopqrstuvwxyz12345678 in 1.o
+CHECKIDX: main in 1.o
+CHECKIDX: fn1 in 2.o
+CHECKIDX: fn3 in 3.o
+CHECKIDX: fn1 in 3.o
+CHECKIDX: shankar in 4.o
+CHECKIDX: a in 5.o
+CHECKIDX: b in 6.o
+CHECKIDX: a in abcdefghijklmnopqrstuvwxyz1.o
+CHECKIDX: b in abcdefghijklmnopqrstuvwxyz2.o
+CHECKIDX: bda in abcdefghijklmnopqrstuvwxyz2.o
+CHECKIDX: b in abcdefghijklmnopq.o
+CHECKIDX: 1.o:
+CHECKIDX: 00000000 D abcdefghijklmnopqrstuvwxyz12345678
+CHECKIDX: U bda
+CHECKIDX: 00000000 T main
+CHECKIDX: 2.o:
+CHECKIDX: 00000000 T fn1
+CHECKIDX: 3.o:
+CHECKIDX: 0000000b T fn1
+CHECKIDX: 00000000 T fn3
+CHECKIDX: 4.o:
+CHECKIDX: C shankar
+CHECKIDX: 5.o:
+CHECKIDX: C a
+CHECKIDX: 6.o:
+CHECKIDX: C b
+CHECKIDX: abcdefghijklmnopqrstuvwxyz1.o:
+CHECKIDX: C a
+CHECKIDX: abcdefghijklmnopqrstuvwxyz2.o:
+CHECKIDX: C b
+CHECKIDX: 00000000 T bda
+CHECKIDX: abcdefghijklmnopq.o:
+CHECKIDX: C b
diff --git a/test/Object/coff-archive.test b/test/Object/coff-archive.test
new file mode 100644
index 0000000000..768fe1c4b1
--- /dev/null
+++ b/test/Object/coff-archive.test
@@ -0,0 +1,225 @@
+#
+# Check if the index is appearing properly in the output file
+#
+RUN: llvm-nm --numeric-sort -s %p/Inputs/coff_archive.lib | FileCheck -check-prefix=CHECKIDX %s
+
+CHECKIDX: Archive map
+CHECKIDX: ??0invalid_argument@std@@QAE@PBD@Z in Debug\mymath.obj
+CHECKIDX: ??0logic_error@std@@QAE@PBD@Z in Debug\mymath.obj
+CHECKIDX: ??1invalid_argument@std@@UAE@XZ in Debug\mymath.obj
+CHECKIDX: ??1logic_error@std@@UAE@XZ in Debug\mymath.obj
+CHECKIDX: ??_7invalid_argument@std@@6B@ in Debug\mymath.obj
+CHECKIDX: ??_7logic_error@std@@6B@ in Debug\mymath.obj
+CHECKIDX: ??_C@_0BC@IHENMCGI@b?5cannot?5be?5zero?$CB?$AA@ in Debug\mymath.obj
+CHECKIDX: ??_Ginvalid_argument@std@@UAEPAXI@Z in Debug\mymath.obj
+CHECKIDX: ??_Glogic_error@std@@UAEPAXI@Z in Debug\mymath.obj
+CHECKIDX: ??_R0?AVexception@std@@@8 in Debug\mymath.obj
+CHECKIDX: ??_R0?AVinvalid_argument@std@@@8 in Debug\mymath.obj
+CHECKIDX: ??_R0?AVlogic_error@std@@@8 in Debug\mymath.obj
+CHECKIDX: ??_R0PAVexception@std@@@8 in Debug\mymath.obj
+CHECKIDX: ??_R0PAVinvalid_argument@std@@@8 in Debug\mymath.obj
+CHECKIDX: ??_R0PAVlogic_error@std@@@8 in Debug\mymath.obj
+CHECKIDX: ??_R0PAX@8 in Debug\mymath.obj
+CHECKIDX: ??_R1A@?0A@EA@exception@std@@8 in Debug\mymath.obj
+CHECKIDX: ??_R1A@?0A@EA@invalid_argument@std@@8 in Debug\mymath.obj
+CHECKIDX: ??_R1A@?0A@EA@logic_error@std@@8 in Debug\mymath.obj
+CHECKIDX: ??_R2exception@std@@8 in Debug\mymath.obj
+CHECKIDX: ??_R2invalid_argument@std@@8 in Debug\mymath.obj
+CHECKIDX: ??_R2logic_error@std@@8 in Debug\mymath.obj
+CHECKIDX: ??_R3exception@std@@8 in Debug\mymath.obj
+CHECKIDX: ??_R3invalid_argument@std@@8 in Debug\mymath.obj
+CHECKIDX: ??_R3logic_error@std@@8 in Debug\mymath.obj
+CHECKIDX: ??_R4invalid_argument@std@@6B@ in Debug\mymath.obj
+CHECKIDX: ??_R4logic_error@std@@6B@ in Debug\mymath.obj
+CHECKIDX: ?Add@MyMathFuncs@MathFuncs@@SANNN@Z in Debug\mymath.obj
+CHECKIDX: ?Divide@MyMathFuncs@MathFuncs@@SANNN@Z in Debug\mymath.obj
+CHECKIDX: ?Multiply@MyMathFuncs@MathFuncs@@SANNN@Z in Debug\mymath.obj
+CHECKIDX: ?Subtract@MyMathFuncs@MathFuncs@@SANNN@Z in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@C@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@D@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@E@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@F@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@G@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@H@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@I@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@J@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@K@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@M@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@N@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@O@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@_J@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@_K@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?_Rank@?$_Arithmetic_traits@_N@std@@2HB in Debug\mymath.obj
+CHECKIDX: ?value@?$integral_constant@I$0A@@tr1@std@@2IB in Debug\mymath.obj
+CHECKIDX: ?value@?$integral_constant@_N$00@tr1@std@@2_NB in Debug\mymath.obj
+CHECKIDX: ?value@?$integral_constant@_N$0A@@tr1@std@@2_NB in Debug\mymath.obj
+CHECKIDX: __CT??_R0PAVexception@std@@@84 in Debug\mymath.obj
+CHECKIDX: __CT??_R0PAVinvalid_argument@std@@@84 in Debug\mymath.obj
+CHECKIDX: __CT??_R0PAVlogic_error@std@@@84 in Debug\mymath.obj
+CHECKIDX: __CT??_R0PAX@84 in Debug\mymath.obj
+CHECKIDX: __CTA4PAVinvalid_argument@std@@ in Debug\mymath.obj
+CHECKIDX: __TI4PAVinvalid_argument@std@@ in Debug\mymath.obj
+CHECKIDX: __real@0000000000000000 in Debug\mymath.obj
+CHECKIDX: Debug\stdafx.obj:
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$T
+CHECKIDX: 00000000 i .drectve
+CHECKIDX: 00000001 a @feat.00
+CHECKIDX: 00ab9d1b a @comp.id
+CHECKIDX: Debug\mymath.obj:
+CHECKIDX: 00000000 d .data
+CHECKIDX: 00000000 d .data
+CHECKIDX: 00000000 d .data
+CHECKIDX: 00000000 d .data
+CHECKIDX: 00000000 d .data
+CHECKIDX: 00000000 d .data
+CHECKIDX: 00000000 d .data
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$S
+CHECKIDX: 00000000 N .debug$T
+CHECKIDX: 00000000 i .drectve
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rdata$r
+CHECKIDX: 00000000 r .rtc$IMZ
+CHECKIDX: 00000000 r .rtc$TMZ
+CHECKIDX: 00000000 N .sxdata
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text
+CHECKIDX: 00000000 t .text$x
+CHECKIDX: 00000000 r .xdata$x
+CHECKIDX: 00000000 r .xdata$x
+CHECKIDX: 00000000 r .xdata$x
+CHECKIDX: 00000000 r .xdata$x
+CHECKIDX: 00000000 r .xdata$x
+CHECKIDX: 00000000 r .xdata$x
+CHECKIDX: 00000000 r .xdata$x
+CHECKIDX: 00000000 T ??0invalid_argument@std@@QAE@PBD@Z
+CHECKIDX: 00000000 T ??0logic_error@std@@QAE@PBD@Z
+CHECKIDX: 00000000 T ??1invalid_argument@std@@UAE@XZ
+CHECKIDX: 00000000 T ??1logic_error@std@@UAE@XZ
+CHECKIDX: 00000000 R ??_C@_0BC@IHENMCGI@b?5cannot?5be?5zero?$CB?$AA@
+CHECKIDX: 00000000 T ??_Ginvalid_argument@std@@UAEPAXI@Z
+CHECKIDX: 00000000 T ??_Glogic_error@std@@UAEPAXI@Z
+CHECKIDX: 00000000 D ??_R0?AVexception@std@@@8
+CHECKIDX: 00000000 D ??_R0?AVinvalid_argument@std@@@8
+CHECKIDX: 00000000 D ??_R0?AVlogic_error@std@@@8
+CHECKIDX: 00000000 D ??_R0PAVexception@std@@@8
+CHECKIDX: 00000000 D ??_R0PAVinvalid_argument@std@@@8
+CHECKIDX: 00000000 D ??_R0PAVlogic_error@std@@@8
+CHECKIDX: 00000000 D ??_R0PAX@8
+CHECKIDX: 00000000 R ??_R1A@?0A@EA@exception@std@@8
+CHECKIDX: 00000000 R ??_R1A@?0A@EA@invalid_argument@std@@8
+CHECKIDX: 00000000 R ??_R1A@?0A@EA@logic_error@std@@8
+CHECKIDX: 00000000 R ??_R2exception@std@@8
+CHECKIDX: 00000000 R ??_R2invalid_argument@std@@8
+CHECKIDX: 00000000 R ??_R2logic_error@std@@8
+CHECKIDX: 00000000 R ??_R3exception@std@@8
+CHECKIDX: 00000000 R ??_R3invalid_argument@std@@8
+CHECKIDX: 00000000 R ??_R3logic_error@std@@8
+CHECKIDX: 00000000 R ??_R4invalid_argument@std@@6B@
+CHECKIDX: 00000000 R ??_R4logic_error@std@@6B@
+CHECKIDX: 00000000 T ?Add@MyMathFuncs@MathFuncs@@SANNN@Z
+CHECKIDX: 00000000 T ?Divide@MyMathFuncs@MathFuncs@@SANNN@Z
+CHECKIDX: 00000000 T ?Multiply@MyMathFuncs@MathFuncs@@SANNN@Z
+CHECKIDX: 00000000 T ?Subtract@MyMathFuncs@MathFuncs@@SANNN@Z
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@C@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@D@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@E@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@F@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@G@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@H@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@I@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@J@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@K@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@M@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@N@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@O@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@_J@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@_K@std@@2HB
+CHECKIDX: 00000000 R ?_Rank@?$_Arithmetic_traits@_N@std@@2HB
+CHECKIDX: 00000000 R ?value@?$integral_constant@I$0A@@tr1@std@@2IB
+CHECKIDX: 00000000 R ?value@?$integral_constant@_N$00@tr1@std@@2_NB
+CHECKIDX: 00000000 R ?value@?$integral_constant@_N$0A@@tr1@std@@2_NB
+CHECKIDX: 00000000 R __CT??_R0PAVexception@std@@@84
+CHECKIDX: 00000000 R __CT??_R0PAVinvalid_argument@std@@@84
+CHECKIDX: 00000000 R __CT??_R0PAVlogic_error@std@@@84
+CHECKIDX: 00000000 R __CT??_R0PAX@84
+CHECKIDX: 00000000 R __CTA4PAVinvalid_argument@std@@
+CHECKIDX: 00000000 r __RTC_InitBase.rtc$IMZ
+CHECKIDX: 00000000 r __RTC_Shutdown.rtc$TMZ
+CHECKIDX: 00000000 R __TI4PAVinvalid_argument@std@@
+CHECKIDX: 00000000 R __real@0000000000000000
+CHECKIDX: 00000000 t __unwindfunclet$?Divide@MyMathFuncs@MathFuncs@@SANNN@Z$0
+CHECKIDX: 00000000 r __unwindtable$?Divide@MyMathFuncs@MathFuncs@@SANNN@Z
+CHECKIDX: 00000001 a @feat.00
+CHECKIDX: 00000004 R ??_7invalid_argument@std@@6B@
+CHECKIDX: 00000004 R ??_7logic_error@std@@6B@
+CHECKIDX: 00000008 r __ehfuncinfo$?Divide@MyMathFuncs@MathFuncs@@SANNN@Z
+CHECKIDX: 0000000e t __ehhandler$?Divide@MyMathFuncs@MathFuncs@@SANNN@Z
+CHECKIDX: 00ab9d1b a @comp.id
+CHECKIDX: U ??2@YAPAXI@Z
+CHECKIDX: U ??3@YAXPAX@Z
+CHECKIDX: U ??_7type_info@@6B@
+CHECKIDX: w ??_Einvalid_argument@std@@UAEPAXI@Z
+CHECKIDX: w ??_Elogic_error@std@@UAEPAXI@Z
+CHECKIDX: U ??_Ginvalid_argument@std@@UAEPAXI@Z
+CHECKIDX: U ??_Glogic_error@std@@UAEPAXI@Z
+CHECKIDX: U ?what@exception@std@@UBEPBDXZ
+CHECKIDX: U @__security_check_cookie@4
+CHECKIDX: U __CxxThrowException@8
+CHECKIDX: U __RTC_CheckEsp
+CHECKIDX: U __RTC_InitBase
+CHECKIDX: U __RTC_Shutdown
+CHECKIDX: U ___CxxFrameHandler3
+CHECKIDX: U ___security_cookie
+CHECKIDX: U __fltused
+CHECKIDX: U __imp_??0exception@std@@QAE@ABQBD@Z
+CHECKIDX: U __imp_??1exception@std@@UAE@XZ
diff --git a/test/Object/simple-archive.test b/test/Object/simple-archive.test
new file mode 100644
index 0000000000..c313f3facd
--- /dev/null
+++ b/test/Object/simple-archive.test
@@ -0,0 +1,12 @@
+#
+# Check if the index is appearing properly in the output file
+#
+RUN: llvm-nm -s %p/Inputs/libsimple_archive.a | FileCheck -check-prefix=CHECKIDX %s
+
+CHECKIDX: Archive map
+CHECKIDX: abcdefghijklmnopqrstuvwxyz12345678 in 1.o
+CHECKIDX: main in 1.o
+CHECKIDX: 1.o:
+CHECKIDX: 00000000 D abcdefghijklmnopqrstuvwxyz12345678
+CHECKIDX: U fn1
+CHECKIDX: 00000000 T main
diff --git a/test/Other/2008-10-15-MissingSpace.ll b/test/Other/2008-10-15-MissingSpace.ll
index cac696ed6f..bc78e84a0a 100644
--- a/test/Other/2008-10-15-MissingSpace.ll
+++ b/test/Other/2008-10-15-MissingSpace.ll
@@ -1,8 +1,12 @@
-; RUN: llvm-as < %s | llvm-dis | not grep "void@"
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
; PR2894
declare void @g()
define void @f() {
- invoke void @g() to label %c unwind label %c
+; CHECK: invoke void @g()
+; CHECK: to label %d unwind label %c
+ invoke void @g() to label %d unwind label %c
+d:
+ ret void
c:
%exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
cleanup
diff --git a/test/Scripts/elf-dump b/test/Scripts/elf-dump
index 69cdacde45..61342d8f98 100755
--- a/test/Scripts/elf-dump
+++ b/test/Scripts/elf-dump
@@ -52,6 +52,31 @@ class StringTable:
end = self.string_table.index('\x00', index)
return self.string_table[index:end]
+class ProgramHeader:
+ def __init__(self, f):
+ self.p_type = f.read32()
+ if f.is64Bit:
+ self.p_flags = f.read32()
+ self.p_offset = f.readWord()
+ self.p_vaddr = f.readWord()
+ self.p_paddr = f.readWord()
+ self.p_filesz = f.readWord()
+ self.p_memsz = f.readWord()
+ if not f.is64Bit:
+ self.p_flags = f.read32()
+ self.p_align = f.readWord()
+
+ def dump(self):
+ print " (('p_type', %s)" % common_dump.HexDump(self.p_type)
+ print " ('p_flags', %s)" % common_dump.HexDump(self.p_flags)
+ print " ('p_offset', %s)" % common_dump.HexDump(self.p_offset)
+ print " ('p_vaddr', %s)" % common_dump.HexDump(self.p_vaddr)
+ print " ('p_paddr', %s)" % common_dump.HexDump(self.p_paddr)
+ print " ('p_filesz', %s)" % common_dump.HexDump(self.p_filesz)
+ print " ('p_memsz', %s)" % common_dump.HexDump(self.p_memsz)
+ print " ('p_align', %s)" % common_dump.HexDump(self.p_align)
+ print " ),"
+
class Section:
def __init__(self, f):
self.sh_name = f.read32()
@@ -189,19 +214,23 @@ def dumpELF(path, opts):
print "('e_machine', %s)" % common_dump.HexDump(e_machine)
print "('e_version', %s)" % common_dump.HexDump(f.read32())
print "('e_entry', %s)" % common_dump.HexDump(f.readWord())
- print "('e_phoff', %s)" % common_dump.HexDump(f.readWord())
+ e_phoff = f.readWord()
+ print "('e_phoff', %s)" % common_dump.HexDump(e_phoff)
e_shoff = f.readWord()
print "('e_shoff', %s)" % common_dump.HexDump(e_shoff)
print "('e_flags', %s)" % common_dump.HexDump(f.read32())
print "('e_ehsize', %s)" % common_dump.HexDump(f.read16())
- print "('e_phentsize', %s)" % common_dump.HexDump(f.read16())
- print "('e_phnum', %s)" % common_dump.HexDump(f.read16())
+ e_phentsize = f.read16()
+ print "('e_phentsize', %s)" % common_dump.HexDump(e_phentsize)
+ e_phnum = f.read16()
+ print "('e_phnum', %s)" % common_dump.HexDump(e_phnum)
e_shentsize = f.read16()
print "('e_shentsize', %s)" % common_dump.HexDump(e_shentsize)
e_shnum = f.read16()
print "('e_shnum', %s)" % common_dump.HexDump(e_shnum)
e_shstrndx = f.read16()
print "('e_shstrndx', %s)" % common_dump.HexDump(e_shstrndx)
+
# Read all section headers
sections = []
@@ -228,6 +257,19 @@ def dumpELF(path, opts):
sections[index].dump(shstrtab, f, strtab, opts.dumpSectionData)
print "])"
+ # Read all program headers
+ headers = []
+ for index in range(e_phnum[0]):
+ f.seek(e_phoff[0] + index * e_phentsize[0])
+ h = ProgramHeader(f)
+ headers.append(h)
+
+ print "('_ProgramHeaders', ["
+ for index in range(e_phnum[0]):
+ print " # Program Header %s" % index
+ headers[index].dump()
+ print "])"
+
if __name__ == "__main__":
from optparse import OptionParser, OptionGroup
parser = OptionParser("usage: %prog [options] {files}")
diff --git a/test/Transforms/BBVectorize/X86/cmp-types.ll b/test/Transforms/BBVectorize/X86/cmp-types.ll
new file mode 100644
index 0000000000..a4fcbb6048
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/cmp-types.ll
@@ -0,0 +1,16 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
+
+%"struct.btSoftBody" = type { float, float, float*, i8 }
+
+define void @test1(%"struct.btSoftBody"* %n1, %"struct.btSoftBody"* %n2) uwtable align 2 {
+entry:
+ %tobool15 = icmp ne %"struct.btSoftBody"* %n1, null
+ %cond16 = zext i1 %tobool15 to i32
+ %tobool21 = icmp ne %"struct.btSoftBody"* %n2, null
+ %cond22 = zext i1 %tobool21 to i32
+ ret void
+; CHECK: @test1
+}
+
diff --git a/test/Transforms/BBVectorize/X86/sh-rec.ll b/test/Transforms/BBVectorize/X86/sh-rec.ll
new file mode 100644
index 0000000000..1e0492c2a8
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/sh-rec.ll
@@ -0,0 +1,54 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
+
+define void @ptoa() nounwind uwtable {
+entry:
+ %call = call i8* @malloc() nounwind
+ br i1 undef, label %return, label %if.end10
+
+if.end10: ; preds = %entry
+ %incdec.ptr = getelementptr inbounds i8* %call, i64 undef
+ %call17 = call i32 @ptou() nounwind
+ %incdec.ptr26.1 = getelementptr inbounds i8* %incdec.ptr, i64 -2
+ store i8 undef, i8* %incdec.ptr26.1, align 1
+ %div27.1 = udiv i32 %call17, 100
+ %rem.2 = urem i32 %div27.1, 10
+ %add2230.2 = or i32 %rem.2, 48
+ %conv25.2 = trunc i32 %add2230.2 to i8
+ %incdec.ptr26.2 = getelementptr inbounds i8* %incdec.ptr, i64 -3
+ store i8 %conv25.2, i8* %incdec.ptr26.2, align 1
+ %incdec.ptr26.3 = getelementptr inbounds i8* %incdec.ptr, i64 -4
+ store i8 undef, i8* %incdec.ptr26.3, align 1
+ %div27.3 = udiv i32 %call17, 10000
+ %rem.4 = urem i32 %div27.3, 10
+ %add2230.4 = or i32 %rem.4, 48
+ %conv25.4 = trunc i32 %add2230.4 to i8
+ %incdec.ptr26.4 = getelementptr inbounds i8* %incdec.ptr, i64 -5
+ store i8 %conv25.4, i8* %incdec.ptr26.4, align 1
+ %div27.4 = udiv i32 %call17, 100000
+ %rem.5 = urem i32 %div27.4, 10
+ %add2230.5 = or i32 %rem.5, 48
+ %conv25.5 = trunc i32 %add2230.5 to i8
+ %incdec.ptr26.5 = getelementptr inbounds i8* %incdec.ptr, i64 -6
+ store i8 %conv25.5, i8* %incdec.ptr26.5, align 1
+ %incdec.ptr26.6 = getelementptr inbounds i8* %incdec.ptr, i64 -7
+ store i8 0, i8* %incdec.ptr26.6, align 1
+ %incdec.ptr26.7 = getelementptr inbounds i8* %incdec.ptr, i64 -8
+ store i8 undef, i8* %incdec.ptr26.7, align 1
+ %div27.7 = udiv i32 %call17, 100000000
+ %rem.8 = urem i32 %div27.7, 10
+ %add2230.8 = or i32 %rem.8, 48
+ %conv25.8 = trunc i32 %add2230.8 to i8
+ %incdec.ptr26.8 = getelementptr inbounds i8* %incdec.ptr, i64 -9
+ store i8 %conv25.8, i8* %incdec.ptr26.8, align 1
+ unreachable
+
+return: ; preds = %entry
+ ret void
+; CHECK: @ptoa
+}
+
+declare noalias i8* @malloc() nounwind
+
+declare i32 @ptou()
diff --git a/test/Transforms/BBVectorize/X86/sh-rec2.ll b/test/Transforms/BBVectorize/X86/sh-rec2.ll
new file mode 100644
index 0000000000..ef2239932f
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/sh-rec2.ll
@@ -0,0 +1,85 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -basicaa -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
+
+%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352 = type { [280 x i16], i16, i64, i32, [8 x i16], [2 x [8 x i16]], i16, i16, [9 x i16], i16, i8, i8 }
+
+define void @gsm_encode(%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352* %s, i16* %source, i8* %c) nounwind uwtable {
+entry:
+ %xmc = alloca [52 x i16], align 16
+ %arraydecay5 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 0
+ call void @Gsm_Coder(%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352* %s, i16* %source, i16* undef, i16* null, i16* undef, i16* undef, i16* undef, i16* %arraydecay5) nounwind
+ %incdec.ptr136 = getelementptr inbounds i8* %c, i64 10
+ %incdec.ptr157 = getelementptr inbounds i8* %c, i64 11
+ store i8 0, i8* %incdec.ptr136, align 1
+ %arrayidx162 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 11
+ %0 = load i16* %arrayidx162, align 2
+ %conv1631 = trunc i16 %0 to i8
+ %and164 = shl i8 %conv1631, 3
+ %shl165 = and i8 %and164, 56
+ %incdec.ptr172 = getelementptr inbounds i8* %c, i64 12
+ store i8 %shl165, i8* %incdec.ptr157, align 1
+ %1 = load i16* inttoptr (i64 2 to i16*), align 2
+ %conv1742 = trunc i16 %1 to i8
+ %and175 = shl i8 %conv1742, 1
+ %incdec.ptr183 = getelementptr inbounds i8* %c, i64 13
+ store i8 %and175, i8* %incdec.ptr172, align 1
+ %incdec.ptr199 = getelementptr inbounds i8* %c, i64 14
+ store i8 0, i8* %incdec.ptr183, align 1
+ %arrayidx214 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 15
+ %incdec.ptr220 = getelementptr inbounds i8* %c, i64 15
+ store i8 0, i8* %incdec.ptr199, align 1
+ %2 = load i16* %arrayidx214, align 2
+ %conv2223 = trunc i16 %2 to i8
+ %and223 = shl i8 %conv2223, 6
+ %incdec.ptr235 = getelementptr inbounds i8* %c, i64 16
+ store i8 %and223, i8* %incdec.ptr220, align 1
+ %arrayidx240 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 19
+ %3 = load i16* %arrayidx240, align 2
+ %conv2414 = trunc i16 %3 to i8
+ %and242 = shl i8 %conv2414, 2
+ %shl243 = and i8 %and242, 28
+ %incdec.ptr251 = getelementptr inbounds i8* %c, i64 17
+ store i8 %shl243, i8* %incdec.ptr235, align 1
+ %incdec.ptr272 = getelementptr inbounds i8* %c, i64 18
+ store i8 0, i8* %incdec.ptr251, align 1
+ %arrayidx282 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 25
+ %4 = load i16* %arrayidx282, align 2
+ %conv2835 = trunc i16 %4 to i8
+ %and284 = and i8 %conv2835, 7
+ %incdec.ptr287 = getelementptr inbounds i8* %c, i64 19
+ store i8 %and284, i8* %incdec.ptr272, align 1
+ %incdec.ptr298 = getelementptr inbounds i8* %c, i64 20
+ store i8 0, i8* %incdec.ptr287, align 1
+ %incdec.ptr314 = getelementptr inbounds i8* %c, i64 21
+ store i8 0, i8* %incdec.ptr298, align 1
+ %arrayidx319 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 26
+ %5 = load i16* %arrayidx319, align 4
+ %conv3206 = trunc i16 %5 to i8
+ %and321 = shl i8 %conv3206, 4
+ %shl322 = and i8 %and321, 112
+ %incdec.ptr335 = getelementptr inbounds i8* %c, i64 22
+ store i8 %shl322, i8* %incdec.ptr314, align 1
+ %arrayidx340 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 29
+ %6 = load i16* %arrayidx340, align 2
+ %conv3417 = trunc i16 %6 to i8
+ %and342 = shl i8 %conv3417, 3
+ %shl343 = and i8 %and342, 56
+ %incdec.ptr350 = getelementptr inbounds i8* %c, i64 23
+ store i8 %shl343, i8* %incdec.ptr335, align 1
+ %incdec.ptr366 = getelementptr inbounds i8* %c, i64 24
+ store i8 0, i8* %incdec.ptr350, align 1
+ %arrayidx381 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 36
+ %incdec.ptr387 = getelementptr inbounds i8* %c, i64 25
+ store i8 0, i8* %incdec.ptr366, align 1
+ %7 = load i16* %arrayidx381, align 8
+ %conv3898 = trunc i16 %7 to i8
+ %and390 = shl i8 %conv3898, 6
+ store i8 %and390, i8* %incdec.ptr387, align 1
+ unreachable
+; CHECK: @gsm_encode
+}
+
+declare void @Gsm_Coder(%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352*, i16*, i16*, i16*, i16*, i16*, i16*, i16*)
+
+declare void @llvm.trap() noreturn nounwind
diff --git a/test/Transforms/BBVectorize/X86/sh-rec3.ll b/test/Transforms/BBVectorize/X86/sh-rec3.ll
new file mode 100644
index 0000000000..fd2cc8bdd9
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/sh-rec3.ll
@@ -0,0 +1,170 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -basicaa -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
+
+%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565 = type { [280 x i16], i16, i64, i32, [8 x i16], [2 x [8 x i16]], i16, i16, [9 x i16], i16, i8, i8 }
+
+define void @gsm_encode(%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565* %s, i16* %source, i8* %c) nounwind uwtable {
+entry:
+ %LARc28 = alloca [2 x i64], align 16
+ %LARc28.sub = getelementptr inbounds [2 x i64]* %LARc28, i64 0, i64 0
+ %tmpcast = bitcast [2 x i64]* %LARc28 to [8 x i16]*
+ %Nc = alloca [4 x i16], align 2
+ %Mc = alloca [4 x i16], align 2
+ %bc = alloca [4 x i16], align 2
+ %xmc = alloca [52 x i16], align 16
+ %arraydecay = bitcast [2 x i64]* %LARc28 to i16*
+ %arraydecay1 = getelementptr inbounds [4 x i16]* %Nc, i64 0, i64 0
+ %arraydecay2 = getelementptr inbounds [4 x i16]* %bc, i64 0, i64 0
+ %arraydecay3 = getelementptr inbounds [4 x i16]* %Mc, i64 0, i64 0
+ %arraydecay5 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 0
+ call void @Gsm_Coder(%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565* %s, i16* %source, i16* %arraydecay, i16* %arraydecay1, i16* %arraydecay2, i16* %arraydecay3, i16* undef, i16* %arraydecay5) nounwind
+ %0 = load i64* %LARc28.sub, align 16
+ %1 = trunc i64 %0 to i32
+ %conv1 = lshr i32 %1, 2
+ %and = and i32 %conv1, 15
+ %or = or i32 %and, 208
+ %conv6 = trunc i32 %or to i8
+ %incdec.ptr = getelementptr inbounds i8* %c, i64 1
+ store i8 %conv6, i8* %c, align 1
+ %conv84 = trunc i64 %0 to i8
+ %and9 = shl i8 %conv84, 6
+ %incdec.ptr15 = getelementptr inbounds i8* %c, i64 2
+ store i8 %and9, i8* %incdec.ptr, align 1
+ %2 = lshr i64 %0, 50
+ %shr226.tr = trunc i64 %2 to i8
+ %conv25 = and i8 %shr226.tr, 7
+ %incdec.ptr26 = getelementptr inbounds i8* %c, i64 3
+ store i8 %conv25, i8* %incdec.ptr15, align 1
+ %incdec.ptr42 = getelementptr inbounds i8* %c, i64 4
+ store i8 0, i8* %incdec.ptr26, align 1
+ %arrayidx52 = getelementptr inbounds [8 x i16]* %tmpcast, i64 0, i64 7
+ %3 = load i16* %arrayidx52, align 2
+ %conv537 = trunc i16 %3 to i8
+ %and54 = and i8 %conv537, 7
+ %incdec.ptr57 = getelementptr inbounds i8* %c, i64 5
+ store i8 %and54, i8* %incdec.ptr42, align 1
+ %incdec.ptr68 = getelementptr inbounds i8* %c, i64 6
+ store i8 0, i8* %incdec.ptr57, align 1
+ %4 = load i16* %arraydecay3, align 2
+ %conv748 = trunc i16 %4 to i8
+ %and75 = shl i8 %conv748, 5
+ %shl76 = and i8 %and75, 96
+ %incdec.ptr84 = getelementptr inbounds i8* %c, i64 7
+ store i8 %shl76, i8* %incdec.ptr68, align 1
+ %arrayidx94 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 1
+ %5 = load i16* %arrayidx94, align 2
+ %conv959 = trunc i16 %5 to i8
+ %and96 = shl i8 %conv959, 1
+ %shl97 = and i8 %and96, 14
+ %or103 = or i8 %shl97, 1
+ %incdec.ptr105 = getelementptr inbounds i8* %c, i64 8
+ store i8 %or103, i8* %incdec.ptr84, align 1
+ %arrayidx115 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 4
+ %6 = bitcast i16* %arrayidx115 to i32*
+ %7 = load i32* %6, align 8
+ %conv11610 = trunc i32 %7 to i8
+ %and117 = and i8 %conv11610, 7
+ %incdec.ptr120 = getelementptr inbounds i8* %c, i64 9
+ store i8 %and117, i8* %incdec.ptr105, align 1
+ %8 = lshr i32 %7, 16
+ %and12330 = shl nuw nsw i32 %8, 5
+ %and123 = trunc i32 %and12330 to i8
+ %incdec.ptr136 = getelementptr inbounds i8* %c, i64 10
+ store i8 %and123, i8* %incdec.ptr120, align 1
+ %incdec.ptr157 = getelementptr inbounds i8* %c, i64 11
+ store i8 0, i8* %incdec.ptr136, align 1
+ %incdec.ptr172 = getelementptr inbounds i8* %c, i64 12
+ store i8 0, i8* %incdec.ptr157, align 1
+ %arrayidx173 = getelementptr inbounds [4 x i16]* %Nc, i64 0, i64 1
+ %9 = load i16* %arrayidx173, align 2
+ %conv17412 = zext i16 %9 to i32
+ %and175 = shl nuw nsw i32 %conv17412, 1
+ %arrayidx177 = getelementptr inbounds [4 x i16]* %bc, i64 0, i64 1
+ %10 = load i16* %arrayidx177, align 2
+ %conv17826 = zext i16 %10 to i32
+ %shr17913 = lshr i32 %conv17826, 1
+ %and180 = and i32 %shr17913, 1
+ %or181 = or i32 %and175, %and180
+ %conv182 = trunc i32 %or181 to i8
+ %incdec.ptr183 = getelementptr inbounds i8* %c, i64 13
+ store i8 %conv182, i8* %incdec.ptr172, align 1
+ %arrayidx188 = getelementptr inbounds [4 x i16]* %Mc, i64 0, i64 1
+ %11 = load i16* %arrayidx188, align 2
+ %conv18914 = trunc i16 %11 to i8
+ %and190 = shl i8 %conv18914, 5
+ %shl191 = and i8 %and190, 96
+ %incdec.ptr199 = getelementptr inbounds i8* %c, i64 14
+ store i8 %shl191, i8* %incdec.ptr183, align 1
+ %arrayidx209 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 14
+ %12 = load i16* %arrayidx209, align 4
+ %conv21015 = trunc i16 %12 to i8
+ %and211 = shl i8 %conv21015, 1
+ %shl212 = and i8 %and211, 14
+ %or218 = or i8 %shl212, 1
+ %incdec.ptr220 = getelementptr inbounds i8* %c, i64 15
+ store i8 %or218, i8* %incdec.ptr199, align 1
+ %arrayidx225 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 16
+ %13 = bitcast i16* %arrayidx225 to i64*
+ %14 = load i64* %13, align 16
+ %conv22616 = trunc i64 %14 to i8
+ %and227 = shl i8 %conv22616, 3
+ %shl228 = and i8 %and227, 56
+ %incdec.ptr235 = getelementptr inbounds i8* %c, i64 16
+ store i8 %shl228, i8* %incdec.ptr220, align 1
+ %15 = lshr i64 %14, 32
+ %and23832 = shl nuw nsw i64 %15, 5
+ %and238 = trunc i64 %and23832 to i8
+ %incdec.ptr251 = getelementptr inbounds i8* %c, i64 17
+ store i8 %and238, i8* %incdec.ptr235, align 1
+ %arrayidx266 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 23
+ %incdec.ptr272 = getelementptr inbounds i8* %c, i64 18
+ store i8 0, i8* %incdec.ptr251, align 1
+ %16 = load i16* %arrayidx266, align 2
+ %conv27418 = trunc i16 %16 to i8
+ %and275 = shl i8 %conv27418, 6
+ %incdec.ptr287 = getelementptr inbounds i8* %c, i64 19
+ store i8 %and275, i8* %incdec.ptr272, align 1
+ %arrayidx288 = getelementptr inbounds [4 x i16]* %Nc, i64 0, i64 2
+ %17 = load i16* %arrayidx288, align 2
+ %conv28919 = zext i16 %17 to i32
+ %and290 = shl nuw nsw i32 %conv28919, 1
+ %arrayidx292 = getelementptr inbounds [4 x i16]* %bc, i64 0, i64 2
+ %18 = load i16* %arrayidx292, align 2
+ %conv29327 = zext i16 %18 to i32
+ %shr29420 = lshr i32 %conv29327, 1
+ %and295 = and i32 %shr29420, 1
+ %or296 = or i32 %and290, %and295
+ %conv297 = trunc i32 %or296 to i8
+ %incdec.ptr298 = getelementptr inbounds i8* %c, i64 20
+ store i8 %conv297, i8* %incdec.ptr287, align 1
+ %conv30021 = trunc i16 %18 to i8
+ %and301 = shl i8 %conv30021, 7
+ %incdec.ptr314 = getelementptr inbounds i8* %c, i64 21
+ store i8 %and301, i8* %incdec.ptr298, align 1
+ %incdec.ptr335 = getelementptr inbounds i8* %c, i64 22
+ store i8 0, i8* %incdec.ptr314, align 1
+ %arrayidx340 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 29
+ %19 = load i16* %arrayidx340, align 2
+ %conv34122 = trunc i16 %19 to i8
+ %and342 = shl i8 %conv34122, 3
+ %shl343 = and i8 %and342, 56
+ %incdec.ptr350 = getelementptr inbounds i8* %c, i64 23
+ store i8 %shl343, i8* %incdec.ptr335, align 1
+ %arrayidx355 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 32
+ %20 = bitcast i16* %arrayidx355 to i32*
+ %21 = load i32* %20, align 16
+ %conv35623 = shl i32 %21, 2
+ %shl358 = and i32 %conv35623, 28
+ %22 = lshr i32 %21, 17
+ %and363 = and i32 %22, 3
+ %or364 = or i32 %shl358, %and363
+ %conv365 = trunc i32 %or364 to i8
+ store i8 %conv365, i8* %incdec.ptr350, align 1
+ unreachable
+; CHECK: @gsm_encode
+}
+
+declare void @Gsm_Coder(%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565*, i16*, i16*, i16*, i16*, i16*, i16*, i16*)
+
+declare void @llvm.trap() noreturn nounwind
diff --git a/test/Transforms/BBVectorize/X86/sh-types.ll b/test/Transforms/BBVectorize/X86/sh-types.ll
new file mode 100644
index 0000000000..0bcb714d5e
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/sh-types.ll
@@ -0,0 +1,25 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
+
+define <4 x float> @test7(<4 x float> %A1, <4 x float> %B1, double %C1, double %C2, double %D1, double %D2) {
+ %A2 = shufflevector <4 x float> %A1, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
+ %B2 = shufflevector <4 x float> %B1, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
+ %X1 = shufflevector <4 x float> %A2, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ %X2 = shufflevector <4 x float> %B2, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+ %Y1 = shufflevector <2 x float> %X1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %Y2 = shufflevector <2 x float> %X2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+
+ %M1 = fsub double %C1, %D1
+ %M2 = fsub double %C2, %D2
+ %N1 = fmul double %M1, %C1
+ %N2 = fmul double %M2, %C2
+ %Z1 = fadd double %N1, %D1
+ %Z2 = fadd double %N2, %D2
+
+ %R = fmul <4 x float> %Y1, %Y2
+ ret <4 x float> %R
+; CHECK: @test7
+; CHECK-NOT: <8 x float>
+; CHECK: ret <4 x float>
+}
+
diff --git a/test/Transforms/GlobalOpt/blockaddress.ll b/test/Transforms/GlobalOpt/blockaddress.ll
new file mode 100644
index 0000000000..13da76299d
--- /dev/null
+++ b/test/Transforms/GlobalOpt/blockaddress.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+@x = internal global i8* zeroinitializer
+
+define void @f() {
+; CHECK: @f
+
+; Check that we don't hit an assert in Constant::IsThreadDependent()
+; when storing this blockaddress into a global.
+
+ store i8* blockaddress(@g, %here), i8** @x, align 8
+ ret void
+}
+
+define void @g() {
+; CHECK: @g
+
+here:
+ ret void
+}
diff --git a/test/Transforms/GlobalOpt/tls.ll b/test/Transforms/GlobalOpt/tls.ll
new file mode 100644
index 0000000000..7a410e5ed2
--- /dev/null
+++ b/test/Transforms/GlobalOpt/tls.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+declare void @wait()
+declare void @signal()
+declare void @start_thread(void ()*)
+
+@x = internal thread_local global [100 x i32] zeroinitializer, align 16
+@ip = internal global i32* null, align 8
+
+; PR14309: GlobalOpt would think that the value of @ip is always the address of
+; x[1]. However, that address is different for different threads so @ip cannot
+; be replaced with a constant.
+
+define i32 @f() {
+entry:
+ ; Set @ip to point to x[1] for thread 1.
+ store i32* getelementptr inbounds ([100 x i32]* @x, i64 0, i64 1), i32** @ip, align 8
+
+ ; Run g on a new thread.
+ tail call void @start_thread(void ()* @g) nounwind
+ tail call void @wait() nounwind
+
+ ; Reset x[1] for thread 1.
+ store i32 0, i32* getelementptr inbounds ([100 x i32]* @x, i64 0, i64 1), align 4
+
+ ; Read the value of @ip, which now points at x[1] for thread 2.
+ %0 = load i32** @ip, align 8
+
+ %1 = load i32* %0, align 4
+ ret i32 %1
+
+; CHECK: @f
+; Make sure that the load from @ip hasn't been removed.
+; CHECK: load i32** @ip
+; CHECK: ret
+}
+
+define internal void @g() nounwind uwtable {
+entry:
+ ; Set @ip to point to x[1] for thread 2.
+ store i32* getelementptr inbounds ([100 x i32]* @x, i64 0, i64 1), i32** @ip, align 8
+
+ ; Store 50 in x[1] for thread 2.
+ store i32 50, i32* getelementptr inbounds ([100 x i32]* @x, i64 0, i64 1), align 4
+
+ tail call void @signal() nounwind
+ ret void
+
+; CHECK: @g
+; Make sure that the store to @ip hasn't been removed.
+; CHECK: store {{.*}} @ip
+; CHECK: ret
+}
diff --git a/test/Transforms/IndVarSimplify/iv-zext.ll b/test/Transforms/IndVarSimplify/iv-zext.ll
index 2e0f70ce46..ed0514b08e 100644
--- a/test/Transforms/IndVarSimplify/iv-zext.ll
+++ b/test/Transforms/IndVarSimplify/iv-zext.ll
@@ -2,7 +2,7 @@
; CHECK-NOT: and
; CHECK-NOT: zext
-target datalayout = "-p:64:64:64-n32:64"
+target datalayout = "p:64:64:64-n32:64"
define void @foo(double* %d, i64 %n) nounwind {
entry:
diff --git a/test/Transforms/Inline/lifetime-no-datalayout.ll b/test/Transforms/Inline/lifetime-no-datalayout.ll
new file mode 100644
index 0000000000..9ad14282f9
--- /dev/null
+++ b/test/Transforms/Inline/lifetime-no-datalayout.ll
@@ -0,0 +1,23 @@
+; RUN: opt -inline %s -S -o - | FileCheck %s
+
+declare void @use(i8* %a)
+
+define void @helper() {
+ %a = alloca i8
+ call void @use(i8* %a)
+ ret void
+}
+
+; Size in llvm.lifetime.X should be -1 (unknown).
+define void @test() {
+; CHECK: @test
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.start(i64 -1
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.end(i64 -1
+ call void @helper()
+; CHECK-NOT: lifetime
+; CHECK: ret void
+ ret void
+}
+
diff --git a/test/Transforms/Inline/lifetime.ll b/test/Transforms/Inline/lifetime.ll
index a95c836b77..fb520498c4 100644
--- a/test/Transforms/Inline/lifetime.ll
+++ b/test/Transforms/Inline/lifetime.ll
@@ -1,22 +1,25 @@
; RUN: opt -inline %s -S -o - | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
declare void @llvm.lifetime.start(i64, i8*)
declare void @llvm.lifetime.end(i64, i8*)
define void @helper_both_markers() {
%a = alloca i8
- call void @llvm.lifetime.start(i64 1, i8* %a)
- call void @llvm.lifetime.end(i64 1, i8* %a)
+ ; Size in llvm.lifetime.start / llvm.lifetime.end differs from
+ ; allocation size. We should use the former.
+ call void @llvm.lifetime.start(i64 2, i8* %a)
+ call void @llvm.lifetime.end(i64 2, i8* %a)
ret void
}
define void @test_both_markers() {
; CHECK: @test_both_markers
-; CHECK: llvm.lifetime.start(i64 1
-; CHECK-NEXT: llvm.lifetime.end(i64 1
+; CHECK: llvm.lifetime.start(i64 2
+; CHECK-NEXT: llvm.lifetime.end(i64 2
call void @helper_both_markers()
-; CHECK-NEXT: llvm.lifetime.start(i64 1
-; CHECK-NEXT: llvm.lifetime.end(i64 1
+; CHECK-NEXT: llvm.lifetime.start(i64 2
+; CHECK-NEXT: llvm.lifetime.end(i64 2
call void @helper_both_markers()
; CHECK-NEXT: ret void
ret void
@@ -27,7 +30,7 @@ define void @test_both_markers() {
declare void @use(i8* %a)
define void @helper_no_markers() {
- %a = alloca i8
+ %a = alloca i8 ; Allocation size is 1 byte.
call void @use(i8* %a)
ret void
}
@@ -37,14 +40,14 @@ define void @helper_no_markers() {
define void @test_no_marker() {
; CHECK: @test_no_marker
; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.start(i64 -1
+; CHECK: llvm.lifetime.start(i64 1
; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.end(i64 -1
+; CHECK: llvm.lifetime.end(i64 1
call void @helper_no_markers()
; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.start(i64 -1
+; CHECK: llvm.lifetime.start(i64 1
; CHECK-NOT: lifetime
-; CHECK: llvm.lifetime.end(i64 -1
+; CHECK: llvm.lifetime.end(i64 1
call void @helper_no_markers()
; CHECK-NOT: lifetime
; CHECK: ret void
@@ -76,3 +79,22 @@ define void @test_two_casts() {
; CHECK: ret void
ret void
}
+
+define void @helper_arrays_alloca() {
+ %a = alloca [10 x i32], align 16
+ %1 = bitcast [10 x i32]* %a to i8*
+ call void @use(i8* %1)
+ ret void
+}
+
+define void @test_arrays_alloca() {
+; CHECK: @test_arrays_alloca
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.start(i64 40,
+; CHECK-NOT: lifetime
+; CHECK: llvm.lifetime.end(i64 40,
+ call void @helper_arrays_alloca()
+; CHECK-NOT: lifetime
+; CHECK: ret void
+ ret void
+}
diff --git a/test/Transforms/InstCombine/abs-1.ll b/test/Transforms/InstCombine/abs-1.ll
new file mode 100644
index 0000000000..807f238755
--- /dev/null
+++ b/test/Transforms/InstCombine/abs-1.ll
@@ -0,0 +1,41 @@
+; Test that the abs library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @abs(i32)
+declare i64 @labs(i64)
+declare i64 @llabs(i64)
+
+; Check abs(x) -> x >s -1 ? x : -x.
+
+define i32 @test_simplify1(i32 %x) {
+; CHECK: @test_simplify1
+ %ret = call i32 @abs(i32 %x)
+; CHECK-NEXT: [[ISPOS:%[a-z0-9]+]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub i32 0, %x
+; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[ISPOS]], i32 %x, i32 [[NEG]]
+ ret i32 %ret
+; CHECK-NEXT: ret i32 [[RET]]
+}
+
+define i64 @test_simplify2(i64 %x) {
+; CHECK: @test_simplify2
+ %ret = call i64 @labs(i64 %x)
+; CHECK-NEXT: [[ISPOS:%[a-z0-9]+]] = icmp sgt i64 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub i64 0, %x
+; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[ISPOS]], i64 %x, i64 [[NEG]]
+ ret i64 %ret
+; CHECK-NEXT: ret i64 [[RET]]
+}
+
+define i64 @test_simplify3(i64 %x) {
+; CHECK: @test_simplify3
+ %ret = call i64 @llabs(i64 %x)
+; CHECK-NEXT: [[ISPOS:%[a-z0-9]+]] = icmp sgt i64 %x, -1
+; CHECK-NEXT: [[NEG:%[a-z0-9]+]] = sub i64 0, %x
+; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[ISPOS]], i64 %x, i64 [[NEG]]
+ ret i64 %ret
+; CHECK-NEXT: ret i64 [[RET]]
+}
diff --git a/test/Transforms/InstCombine/align-external.ll b/test/Transforms/InstCombine/align-external.ll
index d4a5d42991..c3ef2dbb70 100644
--- a/test/Transforms/InstCombine/align-external.ll
+++ b/test/Transforms/InstCombine/align-external.ll
@@ -8,7 +8,7 @@
; CHECK: %q = add i64 %r, 1
; CHECK: ret i64 %q
-target datalayout = "-i32:8:32"
+target datalayout = "i32:8:32"
@A = external global i32
@B = weak_odr global i32 0
diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll
index 50e03479f6..68a671cec8 100644
--- a/test/Transforms/InstCombine/alloca.ll
+++ b/test/Transforms/InstCombine/alloca.ll
@@ -94,3 +94,19 @@ entry:
tail call void @f(i32* %b)
ret void
}
+
+; PR14371
+%opaque_type = type opaque
+%real_type = type { { i32, i32* } }
+
+@opaque_global = external constant %opaque_type, align 4
+
+define void @test7() {
+entry:
+ %0 = alloca %real_type, align 4
+ %1 = bitcast %real_type* %0 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* bitcast (%opaque_type* @opaque_global to i8*), i32 8, i32 1, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/cos-1.ll b/test/Transforms/InstCombine/cos-1.ll
new file mode 100644
index 0000000000..b92e448abd
--- /dev/null
+++ b/test/Transforms/InstCombine/cos-1.ll
@@ -0,0 +1,38 @@
+; Test that the cos library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s -check-prefix=NO-FLOAT-SHRINK
+; RUN: opt < %s -instcombine -enable-double-float-shrink -S | FileCheck %s -check-prefix=DO-FLOAT-SHRINK
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare double @cos(double)
+
+; Check cos(-x) -> cos(x);
+
+define double @test_simplify1(double %d) {
+; NO-FLOAT-SHRINK: @test_simplify1
+ %neg = fsub double -0.000000e+00, %d
+ %cos = call double @cos(double %neg)
+; NO-FLOAT-SHRINK: call double @cos(double %d)
+ ret double %cos
+}
+
+define float @test_simplify2(float %f) {
+; DO-FLOAT-SHRINK: @test_simplify2
+ %conv1 = fpext float %f to double
+ %neg = fsub double -0.000000e+00, %conv1
+ %cos = call double @cos(double %neg)
+ %conv2 = fptrunc double %cos to float
+; DO-FLOAT-SHRINK: call float @cosf(float %f)
+ ret float %conv2
+}
+
+define float @test_simplify3(float %f) {
+; NO-FLOAT-SHRINK: @test_simplify3
+ %conv1 = fpext float %f to double
+ %neg = fsub double -0.000000e+00, %conv1
+ %cos = call double @cos(double %neg)
+; NO-FLOAT-SHRINK: call double @cos(double %conv1)
+ %conv2 = fptrunc double %cos to float
+ ret float %conv2
+}
diff --git a/test/Transforms/InstCombine/cos-2.ll b/test/Transforms/InstCombine/cos-2.ll
new file mode 100644
index 0000000000..2f2dfafe48
--- /dev/null
+++ b/test/Transforms/InstCombine/cos-2.ll
@@ -0,0 +1,17 @@
+; Test that the cos library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare float @cos(double)
+
+; Check that cos functions with the wrong prototype aren't simplified.
+
+define float @test_no_simplify1(double %d) {
+; CHECK: @test_no_simplify1
+ %neg = fsub double -0.000000e+00, %d
+ %cos = call float @cos(double %neg)
+; CHECK: call float @cos(double %neg)
+ ret float %cos
+}
diff --git a/test/Transforms/SimplifyLibCalls/debug-line.ll b/test/Transforms/InstCombine/debug-line.ll
index b668e4b9d3..084efdc989 100644
--- a/test/Transforms/SimplifyLibCalls/debug-line.ll
+++ b/test/Transforms/InstCombine/debug-line.ll
@@ -1,4 +1,4 @@
-; RUN: opt -simplify-libcalls -S < %s | FileCheck %s
+; RUN: opt -instcombine -S < %s | FileCheck %s
@.str = private constant [3 x i8] c"%c\00"
diff --git a/test/Transforms/InstCombine/disable-simplify-libcalls.ll b/test/Transforms/InstCombine/disable-simplify-libcalls.ll
index d81e9ae5bd..c2c29368b1 100644
--- a/test/Transforms/InstCombine/disable-simplify-libcalls.ll
+++ b/test/Transforms/InstCombine/disable-simplify-libcalls.ll
@@ -37,6 +37,18 @@ declare i64 @strtoll(i8*, i8**, i32)
declare i64 @strtoul(i8*, i8**, i32)
declare i64 @strtoull(i8*, i8**, i32)
declare i64 @strcspn(i8*, i8*)
+declare i32 @abs(i32)
+declare i32 @ffs(i32)
+declare i32 @ffsl(i64)
+declare i32 @ffsll(i64)
+declare i32 @fprintf(i8*, i8*)
+declare i32 @isascii(i32)
+declare i32 @isdigit(i32)
+declare i32 @toascii(i32)
+declare i64 @labs(i64)
+declare i64 @llabs(i64)
+declare i32 @printf(i8*)
+declare i32 @sprintf(i8*, i8*)
define double @t1(double %x) {
; CHECK: @t1
@@ -234,3 +246,90 @@ define i64 @t25(i8* %y) {
ret i64 %ret
; CHECK: call i64 @strcspn
}
+
+define i32 @t26(i32 %y) {
+; CHECK: @t26
+ %ret = call i32 @abs(i32 %y)
+ ret i32 %ret
+; CHECK: call i32 @abs
+}
+
+define i32 @t27(i32 %y) {
+; CHECK: @t27
+ %ret = call i32 @ffs(i32 %y)
+ ret i32 %ret
+; CHECK: call i32 @ffs
+}
+
+define i32 @t28(i64 %y) {
+; CHECK: @t28
+ %ret = call i32 @ffsl(i64 %y)
+ ret i32 %ret
+; CHECK: call i32 @ffsl
+}
+
+define i32 @t29(i64 %y) {
+; CHECK: @t29
+ %ret = call i32 @ffsll(i64 %y)
+ ret i32 %ret
+; CHECK: call i32 @ffsll
+}
+
+define void @t30() {
+; CHECK: @t30
+ %x = getelementptr inbounds [13 x i8]* @.str1, i32 0, i32 0
+ call i32 @fprintf(i8* null, i8* %x)
+ ret void
+; CHECK: call i32 @fprintf
+}
+
+define i32 @t31(i32 %y) {
+; CHECK: @t31
+ %ret = call i32 @isascii(i32 %y)
+ ret i32 %ret
+; CHECK: call i32 @isascii
+}
+
+define i32 @t32(i32 %y) {
+; CHECK: @t32
+ %ret = call i32 @isdigit(i32 %y)
+ ret i32 %ret
+; CHECK: call i32 @isdigit
+}
+
+define i32 @t33(i32 %y) {
+; CHECK: @t33
+ %ret = call i32 @toascii(i32 %y)
+ ret i32 %ret
+; CHECK: call i32 @toascii
+}
+
+define i64 @t34(i64 %y) {
+; CHECK: @t34
+ %ret = call i64 @labs(i64 %y)
+ ret i64 %ret
+; CHECK: call i64 @labs
+}
+
+define i64 @t35(i64 %y) {
+; CHECK: @t35
+ %ret = call i64 @llabs(i64 %y)
+ ret i64 %ret
+; CHECK: call i64 @llabs
+}
+
+define void @t36() {
+; CHECK: @t36
+ %x = getelementptr inbounds [1 x i8]* @empty, i32 0, i32 0
+ call i32 @printf(i8* %x)
+ ret void
+; CHECK: call i32 @printf
+}
+
+define void @t37(i8* %x) {
+; CHECK: @t37
+ %y = getelementptr inbounds [13 x i8]* @.str1, i32 0, i32 0
+ call i32 @sprintf(i8* %x, i8* %y)
+ ret void
+; CHECK: call i32 @sprintf
+}
diff --git a/test/Transforms/SimplifyLibCalls/double-float-shrink.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll
index b4ab8b4ceb..e5448ee007 100644
--- a/test/Transforms/SimplifyLibCalls/double-float-shrink.ll
+++ b/test/Transforms/InstCombine/double-float-shrink-1.ll
@@ -1,98 +1,98 @@
-; RUN: opt < %s -simplify-libcalls -enable-double-float-shrink -S | FileCheck %s
+; RUN: opt < %s -instcombine -enable-double-float-shrink -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define float @acos_test(float %f) nounwind readnone {
; CHECK: acos_test
- %conv = fpext float %f to double
- %call = call double @acos(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
+ %conv = fpext float %f to double
+ %call = call double @acos(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
; CHECK: call float @acosf(float %f)
}
define double @acos_test2(float %f) nounwind readnone {
; CHECK: acos_test2
- %conv = fpext float %f to double
- %call = call double @acos(double %conv)
- ret double %call
+ %conv = fpext float %f to double
+ %call = call double @acos(double %conv)
+ ret double %call
; CHECK: call double @acos(double %conv)
}
define float @acosh_test(float %f) nounwind readnone {
; CHECK: acosh_test
- %conv = fpext float %f to double
- %call = call double @acosh(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
+ %conv = fpext float %f to double
+ %call = call double @acosh(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
; CHECK: call float @acoshf(float %f)
}
define double @acosh_test2(float %f) nounwind readnone {
; CHECK: acosh_test2
- %conv = fpext float %f to double
- %call = call double @acosh(double %conv)
- ret double %call
+ %conv = fpext float %f to double
+ %call = call double @acosh(double %conv)
+ ret double %call
; CHECK: call double @acosh(double %conv)
}
define float @asin_test(float %f) nounwind readnone {
; CHECK: asin_test
- %conv = fpext float %f to double
- %call = call double @asin(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
+ %conv = fpext float %f to double
+ %call = call double @asin(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
; CHECK: call float @asinf(float %f)
}
define double @asin_test2(float %f) nounwind readnone {
; CHECK: asin_test2
- %conv = fpext float %f to double
- %call = call double @asin(double %conv)
- ret double %call
+ %conv = fpext float %f to double
+ %call = call double @asin(double %conv)
+ ret double %call
; CHECK: call double @asin(double %conv)
}
define float @asinh_test(float %f) nounwind readnone {
; CHECK: asinh_test
- %conv = fpext float %f to double
- %call = call double @asinh(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
+ %conv = fpext float %f to double
+ %call = call double @asinh(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
; CHECK: call float @asinhf(float %f)
}
define double @asinh_test2(float %f) nounwind readnone {
; CHECK: asinh_test2
- %conv = fpext float %f to double
- %call = call double @asinh(double %conv)
- ret double %call
+ %conv = fpext float %f to double
+ %call = call double @asinh(double %conv)
+ ret double %call
; CHECK: call double @asinh(double %conv)
}
define float @atan_test(float %f) nounwind readnone {
; CHECK: atan_test
- %conv = fpext float %f to double
- %call = call double @atan(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
+ %conv = fpext float %f to double
+ %call = call double @atan(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
; CHECK: call float @atanf(float %f)
}
define double @atan_test2(float %f) nounwind readnone {
; CHECK: atan_test2
- %conv = fpext float %f to double
- %call = call double @atan(double %conv)
- ret double %call
+ %conv = fpext float %f to double
+ %call = call double @atan(double %conv)
+ ret double %call
; CHECK: call double @atan(double %conv)
}
define float @atanh_test(float %f) nounwind readnone {
; CHECK: atanh_test
- %conv = fpext float %f to double
- %call = call double @atanh(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
+ %conv = fpext float %f to double
+ %call = call double @atanh(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
; CHECK: call float @atanhf(float %f)
}
@@ -105,210 +105,210 @@ define double @atanh_test2(float %f) nounwind readnone {
}
define float @cbrt_test(float %f) nounwind readnone {
; CHECK: cbrt_test
- %conv = fpext float %f to double
- %call = call double @cbrt(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
+ %conv = fpext float %f to double
+ %call = call double @cbrt(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
; CHECK: call float @cbrtf(float %f)
}
define double @cbrt_test2(float %f) nounwind readnone {
; CHECK: cbrt_test2
- %conv = fpext float %f to double
- %call = call double @cbrt(double %conv)
- ret double %call
+ %conv = fpext float %f to double
+ %call = call double @cbrt(double %conv)
+ ret double %call
; CHECK: call double @cbrt(double %conv)
}
define float @exp_test(float %f) nounwind readnone {
; CHECK: exp_test
- %conv = fpext float %f to double
- %call = call double @exp(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
+ %conv = fpext float %f to double
+ %call = call double @exp(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
; CHECK: call float @expf(float %f)
}
define double @exp_test2(float %f) nounwind readnone {
; CHECK: exp_test2
- %conv = fpext float %f to double
- %call = call double @exp(double %conv)
- ret double %call
+ %conv = fpext float %f to double
+ %call = call double @exp(double %conv)
+ ret double %call
; CHECK: call double @exp(double %conv)
}
define float @expm1_test(float %f) nounwind readnone {
; CHECK: expm1_test
- %conv = fpext float %f to double
- %call = call double @expm1(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
+ %conv = fpext float %f to double
+ %call = call double @expm1(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
; CHECK: call float @expm1f(float %f)
}
define double @expm1_test2(float %f) nounwind readnone {
; CHECK: expm1_test2
- %conv = fpext float %f to double
- %call = call double @expm1(double %conv)
- ret double %call
+ %conv = fpext float %f to double
+ %call = call double @expm1(double %conv)
+ ret double %call
; CHECK: call double @expm1(double %conv)
}
define float @exp10_test(float %f) nounwind readnone {
; CHECK: exp10_test
- %conv = fpext float %f to double
- %call = call double @exp10(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
+ %conv = fpext float %f to double
+ %call = call double @exp10(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
; CHECK: call float @exp10f(float %f)
}
define double @exp10_test2(float %f) nounwind readnone {
; CHECK: exp10_test2
- %conv = fpext float %f to double
- %call = call double @exp10(double %conv)
- ret double %call
+ %conv = fpext float %f to double
+ %call = call double @exp10(double %conv)
+ ret double %call
; CHECK: call double @exp10(double %conv)
}
define float @log_test(float %f) nounwind readnone {
; CHECK: log_test
- %conv = fpext float %f to double
- %call = call double @log(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
+ %conv = fpext float %f to double
+ %call = call double @log(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
; CHECK: call float @logf(float %f)
}
define double @log_test2(float %f) nounwind readnone {
; CHECK: log_test2
- %conv = fpext float %f to double
- %call = call double @log(double %conv)
- ret double %call
+ %conv = fpext float %f to double
+ %call = call double @log(double %conv)
+ ret double %call
; CHECK: call double @log(double %conv)
}
define float @log10_test(float %f) nounwind readnone {
; CHECK: log10_test
- %conv = fpext float %f to double
- %call = call double @log10(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
+ %conv = fpext float %f to double
+ %call = call double @log10(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
; CHECK: call float @log10f(float %f)
}
define double @log10_test2(float %f) nounwind readnone {
; CHECK: log10_test2
- %conv = fpext float %f to double
- %call = call double @log10(double %conv)
- ret double %call
+ %conv = fpext float %f to double
+ %call = call double @log10(double %conv)
+ ret double %call
; CHECK: call double @log10(double %conv)
}
define float @log1p_test(float %f) nounwind readnone {
; CHECK: log1p_test
- %conv = fpext float %f to double
- %call = call double @log1p(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
+ %conv = fpext float %f to double
+ %call = call double @log1p(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
; CHECK: call float @log1pf(float %f)
}
define double @log1p_test2(float %f) nounwind readnone {
; CHECK: log1p_test2
- %conv = fpext float %f to double
- %call = call double @log1p(double %conv)
- ret double %call
+ %conv = fpext float %f to double
+ %call = call double @log1p(double %conv)
+ ret double %call
; CHECK: call double @log1p(double %conv)
}
define float @log2_test(float %f) nounwind readnone {
; CHECK: log2_test
- %conv = fpext float %f to double
- %call = call double @log2(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
+ %conv = fpext float %f to double
+ %call = call double @log2(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
; CHECK: call float @log2f(float %f)
}
define double @log2_test2(float %f) nounwind readnone {
; CHECK: log2_test2
- %conv = fpext float %f to double
- %call = call double @log2(double %conv)
- ret double %call
+ %conv = fpext float %f to double
+ %call = call double @log2(double %conv)
+ ret double %call
; CHECK: call double @log2(double %conv)
}
define float @logb_test(float %f) nounwind readnone {
; CHECK: logb_test
- %conv = fpext float %f to double
- %call = call double @logb(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
+ %conv = fpext float %f to double
+ %call = call double @logb(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
; CHECK: call float @logbf(float %f)
}
define double @logb_test2(float %f) nounwind readnone {
; CHECK: logb_test2
- %conv = fpext float %f to double
- %call = call double @logb(double %conv)
- ret double %call
+ %conv = fpext float %f to double
+ %call = call double @logb(double %conv)
+ ret double %call
; CHECK: call double @logb(double %conv)
}
define float @sin_test(float %f) nounwind readnone {
; CHECK: sin_test
- %conv = fpext float %f to double
- %call = call double @sin(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
+ %conv = fpext float %f to double
+ %call = call double @sin(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
; CHECK: call float @sinf(float %f)
}
define double @sin_test2(float %f) nounwind readnone {
; CHECK: sin_test2
- %conv = fpext float %f to double
- %call = call double @sin(double %conv)
- ret double %call
+ %conv = fpext float %f to double
+ %call = call double @sin(double %conv)
+ ret double %call
; CHECK: call double @sin(double %conv)
}
define float @sqrt_test(float %f) nounwind readnone {
; CHECK: sqrt_test
- %conv = fpext float %f to double
- %call = call double @sqrt(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
+ %conv = fpext float %f to double
+ %call = call double @sqrt(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
; CHECK: call float @sqrtf(float %f)
}
define double @sqrt_test2(float %f) nounwind readnone {
; CHECK: sqrt_test2
- %conv = fpext float %f to double
- %call = call double @sqrt(double %conv)
- ret double %call
+ %conv = fpext float %f to double
+ %call = call double @sqrt(double %conv)
+ ret double %call
; CHECK: call double @sqrt(double %conv)
}
define float @tan_test(float %f) nounwind readnone {
; CHECK: tan_test
- %conv = fpext float %f to double
- %call = call double @tan(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
+ %conv = fpext float %f to double
+ %call = call double @tan(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
; CHECK: call float @tanf(float %f)
}
define double @tan_test2(float %f) nounwind readnone {
; CHECK: tan_test2
- %conv = fpext float %f to double
- %call = call double @tan(double %conv)
- ret double %call
+ %conv = fpext float %f to double
+ %call = call double @tan(double %conv)
+ ret double %call
; CHECK: call double @tan(double %conv)
}
define float @tanh_test(float %f) nounwind readnone {
; CHECK: tanh_test
- %conv = fpext float %f to double
- %call = call double @tanh(double %conv)
- %conv1 = fptrunc double %call to float
- ret float %conv1
+ %conv = fpext float %f to double
+ %call = call double @tanh(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
; CHECK: call float @tanhf(float %f)
}
define double @tanh_test2(float %f) nounwind readnone {
; CHECK: tanh_test2
- %conv = fpext float %f to double
- %call = call double @tanh(double %conv)
- ret double %call
+ %conv = fpext float %f to double
+ %call = call double @tanh(double %conv)
+ ret double %call
; CHECK: call double @tanh(double %conv)
}
diff --git a/test/Transforms/InstCombine/double-float-shrink-2.ll b/test/Transforms/InstCombine/double-float-shrink-2.ll
new file mode 100644
index 0000000000..7f6df92c96
--- /dev/null
+++ b/test/Transforms/InstCombine/double-float-shrink-2.ll
@@ -0,0 +1,80 @@
+; RUN: opt < %s -instcombine -S -mtriple "i386-pc-linux" | FileCheck -check-prefix=DO-SIMPLIFY %s
+; RUN: opt < %s -instcombine -S -mtriple "i386-pc-win32" | FileCheck -check-prefix=DONT-SIMPLIFY %s
+; RUN: opt < %s -instcombine -S -mtriple "x86_64-pc-win32" | FileCheck -check-prefix=C89-SIMPLIFY %s
+; RUN: opt < %s -instcombine -S -mtriple "i386-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s
+; RUN: opt < %s -instcombine -S -mtriple "x86_64-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s
+; RUN: opt < %s -instcombine -S -mtriple "sparc-sun-solaris" | FileCheck -check-prefix=DO-SIMPLIFY %s
+
+; DO-SIMPLIFY: call float @floorf(
+; DO-SIMPLIFY: call float @ceilf(
+; DO-SIMPLIFY: call float @roundf(
+; DO-SIMPLIFY: call float @nearbyintf(
+; DO-SIMPLIFY: call float @truncf(
+; DO-SIMPLIFY: call float @fabsf(
+
+; C89-SIMPLIFY: call float @floorf(
+; C89-SIMPLIFY: call float @ceilf(
+; C89-SIMPLIFY: call double @round(
+; C89-SIMPLIFY: call double @nearbyint(
+
+; DONT-SIMPLIFY: call double @floor(
+; DONT-SIMPLIFY: call double @ceil(
+; DONT-SIMPLIFY: call double @round(
+; DONT-SIMPLIFY: call double @nearbyint(
+; DONT-SIMPLIFY: call double @trunc(
+; DONT-SIMPLIFY: call double @fabs(
+
+declare double @floor(double)
+declare double @ceil(double)
+declare double @round(double)
+declare double @nearbyint(double)
+declare double @trunc(double)
+declare double @fabs(double)
+
+define float @test_floor(float %C) {
+ %D = fpext float %C to double
+ ; --> floorf
+ %E = call double @floor(double %D)
+ %F = fptrunc double %E to float
+ ret float %F
+}
+
+define float @test_ceil(float %C) {
+ %D = fpext float %C to double
+ ; --> ceilf
+ %E = call double @ceil(double %D)
+ %F = fptrunc double %E to float
+ ret float %F
+}
+
+define float @test_round(float %C) {
+ %D = fpext float %C to double
+ ; --> roundf
+ %E = call double @round(double %D)
+ %F = fptrunc double %E to float
+ ret float %F
+}
+
+define float @test_nearbyint(float %C) {
+ %D = fpext float %C to double
+ ; --> nearbyintf
+ %E = call double @nearbyint(double %D)
+ %F = fptrunc double %E to float
+ ret float %F
+}
+
+define float @test_trunc(float %C) {
+ %D = fpext float %C to double
+ ; --> truncf
+ %E = call double @trunc(double %D)
+ %F = fptrunc double %E to float
+ ret float %F
+}
+
+define float @test_fabs(float %C) {
+ %D = fpext float %C to double
+ ; --> fabsf
+ %E = call double @fabs(double %D)
+ %F = fptrunc double %E to float
+ ret float %F
+}
diff --git a/test/Transforms/InstCombine/exp2-1.ll b/test/Transforms/InstCombine/exp2-1.ll
new file mode 100644
index 0000000000..1b0ad50004
--- /dev/null
+++ b/test/Transforms/InstCombine/exp2-1.ll
@@ -0,0 +1,76 @@
+; Test that the exp2 library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare double @exp2(double)
+declare float @exp2f(float)
+
+; Check exp2(sitofp(x)) -> ldexp(1.0, sext(x)).
+
+define double @test_simplify1(i32 %x) {
+; CHECK: @test_simplify1
+ %conv = sitofp i32 %x to double
+ %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+ ret double %ret
+}
+
+define double @test_simplify2(i16 signext %x) {
+; CHECK: @test_simplify2
+ %conv = sitofp i16 %x to double
+ %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+ ret double %ret
+}
+
+define double @test_simplify3(i8 signext %x) {
+; CHECK: @test_simplify3
+ %conv = sitofp i8 %x to double
+ %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+ ret double %ret
+}
+
+define float @test_simplify4(i32 %x) {
+; CHECK: @test_simplify4
+ %conv = sitofp i32 %x to float
+ %ret = call float @exp2f(float %conv)
+; CHECK: call float @ldexpf
+ ret float %ret
+}
+
+; Check exp2(uitofp(x)) -> ldexp(1.0, zext(x)).
+
+define double @test_no_simplify1(i32 %x) {
+; CHECK: @test_no_simplify1
+ %conv = uitofp i32 %x to double
+ %ret = call double @exp2(double %conv)
+; CHECK: call double @exp2
+ ret double %ret
+}
+
+define double @test_simplify6(i16 zeroext %x) {
+; CHECK: @test_simplify6
+ %conv = uitofp i16 %x to double
+ %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+ ret double %ret
+}
+
+define double @test_simplify7(i8 zeroext %x) {
+; CHECK: @test_simplify7
+ %conv = uitofp i8 %x to double
+ %ret = call double @exp2(double %conv)
+; CHECK: call double @ldexp
+ ret double %ret
+}
+
+define float @test_simplify8(i8 zeroext %x) {
+; CHECK: @test_simplify8
+ %conv = uitofp i8 %x to float
+ %ret = call float @exp2f(float %conv)
+; CHECK: call float @ldexpf
+ ret float %ret
+}
diff --git a/test/Transforms/InstCombine/exp2-2.ll b/test/Transforms/InstCombine/exp2-2.ll
new file mode 100644
index 0000000000..bed063798e
--- /dev/null
+++ b/test/Transforms/InstCombine/exp2-2.ll
@@ -0,0 +1,17 @@
+; Test that the exp2 library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare float @exp2(double)
+
+; Check that exp2 functions with the wrong prototype aren't simplified.
+
+define float @test_no_simplify1(i32 %x) {
+; CHECK: @test_no_simplify1
+ %conv = sitofp i32 %x to double
+ %ret = call float @exp2(double %conv)
+; CHECK: call float @exp2(double %conv)
+ ret float %ret
+}
diff --git a/test/Transforms/InstCombine/ffs-1.ll b/test/Transforms/InstCombine/ffs-1.ll
new file mode 100644
index 0000000000..0510df3d24
--- /dev/null
+++ b/test/Transforms/InstCombine/ffs-1.ll
@@ -0,0 +1,134 @@
+; Test that the ffs* library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=LINUX
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @ffs(i32)
+declare i32 @ffsl(i32)
+declare i32 @ffsll(i64)
+
+; Check ffs(0) -> 0.
+
+define i32 @test_simplify1() {
+; CHECK: @test_simplify1
+ %ret = call i32 @ffs(i32 0)
+ ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify2() {
+; CHECK-LINUX: @test_simplify2
+ %ret = call i32 @ffsl(i32 0)
+ ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 0
+}
+
+define i32 @test_simplify3() {
+; CHECK-LINUX: @test_simplify3
+ %ret = call i32 @ffsll(i64 0)
+ ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 0
+}
+
+; Check ffs(c) -> cttz(c) + 1, where 'c' is a constant.
+
+define i32 @test_simplify4() {
+; CHECK: @test_simplify4
+ %ret = call i32 @ffs(i32 1)
+ ret i32 %ret
+; CHECK-NEXT: ret i32 1
+}
+
+define i32 @test_simplify5() {
+; CHECK: @test_simplify5
+ %ret = call i32 @ffs(i32 2048)
+ ret i32 %ret
+; CHECK-NEXT: ret i32 12
+}
+
+define i32 @test_simplify6() {
+; CHECK: @test_simplify6
+ %ret = call i32 @ffs(i32 65536)
+ ret i32 %ret
+; CHECK-NEXT: ret i32 17
+}
+
+define i32 @test_simplify7() {
+; CHECK-LINUX: @test_simplify7
+ %ret = call i32 @ffsl(i32 65536)
+ ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 17
+}
+
+define i32 @test_simplify8() {
+; CHECK-LINUX: @test_simplify8
+ %ret = call i32 @ffsll(i64 1024)
+ ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 11
+}
+
+define i32 @test_simplify9() {
+; CHECK-LINUX: @test_simplify9
+ %ret = call i32 @ffsll(i64 65536)
+ ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 17
+}
+
+define i32 @test_simplify10() {
+; CHECK-LINUX: @test_simplify10
+ %ret = call i32 @ffsll(i64 17179869184)
+ ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 35
+}
+
+define i32 @test_simplify11() {
+; CHECK-LINUX: @test_simplify11
+ %ret = call i32 @ffsll(i64 281474976710656)
+ ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 49
+}
+
+define i32 @test_simplify12() {
+; CHECK-LINUX: @test_simplify12
+ %ret = call i32 @ffsll(i64 1152921504606846976)
+ ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 61
+}
+
+; Check ffs(x) -> x != 0 ? (i32)llvm.cttz(x) + 1 : 0.
+
+define i32 @test_simplify13(i32 %x) {
+; CHECK: @test_simplify13
+ %ret = call i32 @ffs(i32 %x)
+; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-NEXT: [[INC:%[a-z0-9]+]] = add i32 [[CTTZ]], 1
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
+; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
+ ret i32 %ret
+; CHECK-NEXT: ret i32 [[RET]]
+}
+
+define i32 @test_simplify14(i32 %x) {
+; CHECK-LINUX: @test_simplify14
+ %ret = call i32 @ffsl(i32 %x)
+; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add i32 [[CTTZ]], 1
+; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
+; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
+ ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 [[RET]]
+}
+
+define i32 @test_simplify15(i64 %x) {
+; CHECK-LINUX: @test_simplify15
+ %ret = call i32 @ffsll(i64 %x)
+; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 false)
+; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add i64 [[CTTZ]], 1
+; CHECK-LINUX-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i64 [[INC]] to i32
+; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i64 %x, 0
+; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[TRUNC]], i32 0
+ ret i32 %ret
+; CHECK-LINUX-NEXT: ret i32 [[RET]]
+}
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index eaff87d695..8e064a4f2f 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -659,3 +659,21 @@ define i1 @test64(i8 %a, i32 %b) nounwind {
; CHECK-NEXT: %c = icmp eq i8 %1, %a
; CHECK-NEXT: ret i1 %c
}
+
+define i1 @test65(i64 %A, i64 %B) {
+ %s1 = add i64 %A, %B
+ %s2 = add i64 %A, %B
+ %cmp = icmp eq i64 %s1, %s2
+; CHECK: @test65
+; CHECK-NEXT: ret i1 true
+ ret i1 %cmp
+}
+
+define i1 @test66(i64 %A, i64 %B) {
+ %s1 = add i64 %A, %B
+ %s2 = add i64 %B, %A
+ %cmp = icmp eq i64 %s1, %s2
+; CHECK: @test66
+; CHECK-NEXT: ret i1 true
+ ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/isascii-1.ll b/test/Transforms/InstCombine/isascii-1.ll
new file mode 100644
index 0000000000..2a413d89b4
--- /dev/null
+++ b/test/Transforms/InstCombine/isascii-1.ll
@@ -0,0 +1,32 @@
+; Test that the isascii library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @isascii(i32)
+
+; Check isascii(c) -> c <u 128.
+
+define i32 @test_simplify1() {
+; CHECK: @test_simplify1
+ %ret = call i32 @isascii(i32 127)
+ ret i32 %ret
+; CHECK-NEXT: ret i32 1
+}
+
+define i32 @test_simplify2() {
+; CHECK: @test_simplify2
+ %ret = call i32 @isascii(i32 128)
+ ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify3(i32 %x) {
+; CHECK: @test_simplify3
+ %ret = call i32 @isascii(i32 %x)
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %x, 128
+; CHECK-NEXT: [[ZEXT:%[a-z0-9]+]] = zext i1 [[CMP]] to i32
+ ret i32 %ret
+; CHECK-NEXT: ret i32 [[ZEXT]]
+}
diff --git a/test/Transforms/InstCombine/isdigit-1.ll b/test/Transforms/InstCombine/isdigit-1.ll
new file mode 100644
index 0000000000..f291296c88
--- /dev/null
+++ b/test/Transforms/InstCombine/isdigit-1.ll
@@ -0,0 +1,48 @@
+; Test that the isdigit library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @isdigit(i32)
+
+; Check isdigit(c) -> (c - '0') <u 10;
+
+define i32 @test_simplify1() {
+; CHECK: @test_simplify1
+ %ret = call i32 @isdigit(i32 47)
+ ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify2() {
+; CHECK: @test_simplify2
+ %ret = call i32 @isdigit(i32 48)
+ ret i32 %ret
+; CHECK-NEXT: ret i32 1
+}
+
+define i32 @test_simplify3() {
+; CHECK: @test_simplify3
+ %ret = call i32 @isdigit(i32 57)
+ ret i32 %ret
+; CHECK-NEXT: ret i32 1
+}
+
+define i32 @test_simplify4() {
+; CHECK: @test_simplify4
+ %ret = call i32 @isdigit(i32 58)
+ ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify5(i32 %x) {
+; CHECK: @test_simplify5
+
+ %ret = call i32 @isdigit(i32 %x)
+; CHECK-NEXT: [[ADD:%[a-z0-9]+]] = add i32 %x, -48
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 [[ADD]], 10
+; CHECK-NEXT: [[ZEXT:%[a-z0-9]+]] = zext i1 [[CMP]] to i32
+ ret i32 %ret
+; CHECK-NEXT: ret i32 [[ZEXT]]
+}
diff --git a/test/Transforms/InstCombine/memcmp-1.ll b/test/Transforms/InstCombine/memcmp-1.ll
index 4238c5f8fb..c97b201fc0 100644
--- a/test/Transforms/InstCombine/memcmp-1.ll
+++ b/test/Transforms/InstCombine/memcmp-1.ll
@@ -59,7 +59,7 @@ define i32 @test_simplify5() {
%mem2 = getelementptr [4 x i8]* @foo, i32 0, i32 0
%ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3)
ret i32 %ret
-; CHECK: ret i32 {{[0-9]+}}
+; CHECK: ret i32 1
}
define i32 @test_simplify6() {
@@ -68,5 +68,5 @@ define i32 @test_simplify6() {
%mem2 = getelementptr [4 x i8]* @hel, i32 0, i32 0
%ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3)
ret i32 %ret
-; CHECK: ret i32 {{-[0-9]+}}
+; CHECK: ret i32 -1
}
diff --git a/test/Transforms/InstCombine/memcpy-from-global.ll b/test/Transforms/InstCombine/memcpy-from-global.ll
index 83c893e17d..557b160a87 100644
--- a/test/Transforms/InstCombine/memcpy-from-global.ll
+++ b/test/Transforms/InstCombine/memcpy-from-global.ll
@@ -134,3 +134,13 @@ define void @test8() {
; CHECK: bar
ret void
}
+
+define void @test9() {
+ %A = alloca %U, align 4
+ %a = bitcast %U* %A to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U]* @H, i64 0, i32 1) to i8*), i64 20, i32 4, i1 false)
+ call void @bar(i8* %a) readonly
+; CHECK: @test9
+; CHECK-NEXT: call void @bar(i8* bitcast (%U* getelementptr inbounds ([2 x %U]* @H, i64 0, i64 1) to i8*))
+ ret void
+}
diff --git a/test/Transforms/InstCombine/pow-1.ll b/test/Transforms/InstCombine/pow-1.ll
new file mode 100644
index 0000000000..c8e5f3806f
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-1.ll
@@ -0,0 +1,152 @@
+; Test that the pow library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; rdar://7251832
+
+; NOTE: The readonly attribute on the pow call should be preserved
+; in the cases below where pow is transformed into another function call.
+
+declare float @powf(float, float) nounwind readonly
+declare double @pow(double, double) nounwind readonly
+
+; Check pow(1.0, x) -> 1.0.
+
+define float @test_simplify1(float %x) {
+; CHECK: @test_simplify1
+ %retval = call float @powf(float 1.0, float %x)
+ ret float %retval
+; CHECK-NEXT: ret float 1.000000e+00
+}
+
+define double @test_simplify2(double %x) {
+; CHECK: @test_simplify2
+ %retval = call double @pow(double 1.0, double %x)
+ ret double %retval
+; CHECK-NEXT: ret double 1.000000e+00
+}
+
+; Check pow(2.0, x) -> exp2(x).
+
+define float @test_simplify3(float %x) {
+; CHECK: @test_simplify3
+ %retval = call float @powf(float 2.0, float %x)
+; CHECK-NEXT: [[EXP2F:%[a-z0-9]+]] = call float @exp2f(float %x) nounwind readonly
+ ret float %retval
+; CHECK-NEXT: ret float [[EXP2F]]
+}
+
+define double @test_simplify4(double %x) {
+; CHECK: @test_simplify4
+ %retval = call double @pow(double 2.0, double %x)
+; CHECK-NEXT: [[EXP2:%[a-z0-9]+]] = call double @exp2(double %x) nounwind readonly
+ ret double %retval
+; CHECK-NEXT: ret double [[EXP2]]
+}
+
+; Check pow(x, 0.0) -> 1.0.
+
+define float @test_simplify5(float %x) {
+; CHECK: @test_simplify5
+ %retval = call float @powf(float %x, float 0.0)
+ ret float %retval
+; CHECK-NEXT: ret float 1.000000e+00
+}
+
+define double @test_simplify6(double %x) {
+; CHECK: @test_simplify6
+ %retval = call double @pow(double %x, double 0.0)
+ ret double %retval
+; CHECK-NEXT: ret double 1.000000e+00
+}
+
+; Check pow(x, 0.5) -> fabs(sqrt(x)), where x != -infinity.
+
+define float @test_simplify7(float %x) {
+; CHECK: @test_simplify7
+ %retval = call float @powf(float %x, float 0.5)
+; CHECK-NEXT: [[SQRTF:%[a-z0-9]+]] = call float @sqrtf(float %x) nounwind readonly
+; CHECK-NEXT: [[FABSF:%[a-z0-9]+]] = call float @fabsf(float [[SQRTF]]) nounwind readonly
+; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq float %x, 0xFFF0000000000000
+; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], float 0x7FF0000000000000, float [[FABSF]]
+ ret float %retval
+; CHECK-NEXT: ret float [[SELECT]]
+}
+
+define double @test_simplify8(double %x) {
+; CHECK: @test_simplify8
+ %retval = call double @pow(double %x, double 0.5)
+; CHECK-NEXT: [[SQRT:%[a-z0-9]+]] = call double @sqrt(double %x) nounwind readonly
+; CHECK-NEXT: [[FABS:%[a-z0-9]+]] = call double @fabs(double [[SQRT]]) nounwind readonly
+; CHECK-NEXT: [[FCMP:%[a-z0-9]+]] = fcmp oeq double %x, 0xFFF0000000000000
+; CHECK-NEXT: [[SELECT:%[a-z0-9]+]] = select i1 [[FCMP]], double 0x7FF0000000000000, double [[FABS]]
+ ret double %retval
+; CHECK-NEXT: ret double [[SELECT]]
+}
+
+; Check pow(-infinity, 0.5) -> +infinity.
+
+define float @test_simplify9(float %x) {
+; CHECK: @test_simplify9
+ %retval = call float @powf(float 0xFFF0000000000000, float 0.5)
+ ret float %retval
+; CHECK-NEXT: ret float 0x7FF0000000000000
+}
+
+define double @test_simplify10(double %x) {
+; CHECK: @test_simplify10
+ %retval = call double @pow(double 0xFFF0000000000000, double 0.5)
+ ret double %retval
+; CHECK-NEXT: ret double 0x7FF0000000000000
+}
+
+; Check pow(x, 1.0) -> x.
+
+define float @test_simplify11(float %x) {
+; CHECK: @test_simplify11
+ %retval = call float @powf(float %x, float 1.0)
+ ret float %retval
+; CHECK-NEXT: ret float %x
+}
+
+define double @test_simplify12(double %x) {
+; CHECK: @test_simplify12
+ %retval = call double @pow(double %x, double 1.0)
+ ret double %retval
+; CHECK-NEXT: ret double %x
+}
+
+; Check pow(x, 2.0) -> x*x.
+
+define float @test_simplify13(float %x) {
+; CHECK: @test_simplify13
+ %retval = call float @powf(float %x, float 2.0)
+; CHECK-NEXT: [[SQUARE:%[a-z0-9]+]] = fmul float %x, %x
+ ret float %retval
+; CHECK-NEXT: ret float [[SQUARE]]
+}
+
+define double @test_simplify14(double %x) {
+; CHECK: @test_simplify14
+ %retval = call double @pow(double %x, double 2.0)
+; CHECK-NEXT: [[SQUARE:%[a-z0-9]+]] = fmul double %x, %x
+ ret double %retval
+; CHECK-NEXT: ret double [[SQUARE]]
+}
+
+; Check pow(x, -1.0) -> 1.0/x.
+
+define float @test_simplify15(float %x) {
+; CHECK: @test_simplify15
+ %retval = call float @powf(float %x, float -1.0)
+; CHECK-NEXT: [[RECIPROCAL:%[a-z0-9]+]] = fdiv float 1.000000e+00, %x
+ ret float %retval
+; CHECK-NEXT: ret float [[RECIPROCAL]]
+}
+
+define double @test_simplify16(double %x) {
+; CHECK: @test_simplify16
+ %retval = call double @pow(double %x, double -1.0)
+; CHECK-NEXT: [[RECIPROCAL:%[a-z0-9]+]] = fdiv double 1.000000e+00, %x
+ ret double %retval
+; CHECK-NEXT: ret double [[RECIPROCAL]]
+}
diff --git a/test/Transforms/InstCombine/pow-2.ll b/test/Transforms/InstCombine/pow-2.ll
new file mode 100644
index 0000000000..af64cda090
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-2.ll
@@ -0,0 +1,14 @@
+; Test that the pow library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare float @pow(double, double)
+
+; Check that pow functions with the wrong prototype aren't simplified.
+
+define float @test_no_simplify1(double %x) {
+; CHECK: @test_no_simplify1
+ %retval = call float @pow(double 1.0, double %x)
+; CHECK-NEXT: call float @pow(double 1.000000e+00, double %x)
+ ret float %retval
+}
diff --git a/test/Transforms/InstCombine/pr12338.ll b/test/Transforms/InstCombine/pr12338.ll
index 2b5c8f8a74..d34600f0fa 100644
--- a/test/Transforms/InstCombine/pr12338.ll
+++ b/test/Transforms/InstCombine/pr12338.ll
@@ -1,24 +1,24 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
-define void @entry() nounwind {
-entry:
- br label %for.cond
-
-for.cond:
+define void @entry() nounwind {
+entry:
+ br label %for.cond
+
+for.cond:
%local = phi <1 x i32> [ <i32 0>, %entry ], [ %phi2, %cond.end47 ]
-; CHECK: sub <1 x i32> <i32 92>, %local
- %phi3 = sub <1 x i32> zeroinitializer, %local
- br label %cond.end
-
-cond.false:
- br label %cond.end
-
-cond.end:
- %cond = phi <1 x i32> [ %phi3, %for.cond ], [ undef, %cond.false ]
- br label %cond.end47
-
-cond.end47:
- %sum = add <1 x i32> %cond, <i32 92>
- %phi2 = sub <1 x i32> zeroinitializer, %sum
- br label %for.cond
-}
+; CHECK: sub <1 x i32> <i32 92>, %local
+ %phi3 = sub <1 x i32> zeroinitializer, %local
+ br label %cond.end
+
+cond.false:
+ br label %cond.end
+
+cond.end:
+ %cond = phi <1 x i32> [ %phi3, %for.cond ], [ undef, %cond.false ]
+ br label %cond.end47
+
+cond.end47:
+ %sum = add <1 x i32> %cond, <i32 92>
+ %phi2 = sub <1 x i32> zeroinitializer, %sum
+ br label %for.cond
+}
diff --git a/test/Transforms/InstCombine/printf-1.ll b/test/Transforms/InstCombine/printf-1.ll
new file mode 100644
index 0000000000..3a910ea437
--- /dev/null
+++ b/test/Transforms/InstCombine/printf-1.ll
@@ -0,0 +1,119 @@
+; Test that the printf library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello_world = constant [13 x i8] c"hello world\0A\00"
+@h = constant [2 x i8] c"h\00"
+@percent = constant [2 x i8] c"%\00"
+@percent_c = constant [3 x i8] c"%c\00"
+@percent_d = constant [3 x i8] c"%d\00"
+@percent_f = constant [3 x i8] c"%f\00"
+@percent_s = constant [4 x i8] c"%s\0A\00"
+@empty = constant [1 x i8] c"\00"
+; CHECK: [[STR:@[a-z0-9]+]] = private unnamed_addr constant [12 x i8] c"hello world\00"
+
+declare i32 @printf(i8*, ...)
+
+; Check printf("") -> noop.
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+ %fmt = getelementptr [1 x i8]* @empty, i32 0, i32 0
+ call i32 (i8*, ...)* @printf(i8* %fmt)
+ ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf("x") -> putchar('x'), even for '%'.
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+ %fmt = getelementptr [2 x i8]* @h, i32 0, i32 0
+ call i32 (i8*, ...)* @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @putchar(i32 104)
+ ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify3() {
+; CHECK: @test_simplify3
+ %fmt = getelementptr [2 x i8]* @percent, i32 0, i32 0
+ call i32 (i8*, ...)* @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @putchar(i32 37)
+ ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf("foo\n") -> puts("foo").
+
+define void @test_simplify4() {
+; CHECK: @test_simplify4
+ %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+ call i32 (i8*, ...)* @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([12 x i8]* [[STR]], i32 0, i32 0))
+ ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf("%c", chr) -> putchar(chr).
+
+define void @test_simplify5() {
+; CHECK: @test_simplify5
+ %fmt = getelementptr [3 x i8]* @percent_c, i32 0, i32 0
+ call i32 (i8*, ...)* @printf(i8* %fmt, i8 104)
+; CHECK-NEXT: call i32 @putchar(i32 104)
+ ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf("%s\n", str) -> puts(str).
+
+define void @test_simplify6() {
+; CHECK: @test_simplify6
+ %fmt = getelementptr [4 x i8]* @percent_s, i32 0, i32 0
+ %str = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+ call i32 (i8*, ...)* @printf(i8* %fmt, i8* %str)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0))
+ ret void
+; CHECK-NEXT: ret void
+}
+
+; Check printf(format, ...) -> iprintf(format, ...) if no floating point.
+
+define void @test_simplify7() {
+; CHECK-IPRINTF: @test_simplify7
+ %fmt = getelementptr [3 x i8]* @percent_d, i32 0, i32 0
+ call i32 (i8*, ...)* @printf(i8* %fmt, i32 187)
+; CHECK-NEXT-IPRINTF: call i32 (i8*, ...)* @iprintf(i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187)
+ ret void
+; CHECK-NEXT-IPRINTF: ret void
+}
+
+define void @test_no_simplify1() {
+; CHECK-IPRINTF: @test_no_simplify1
+ %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0
+ call i32 (i8*, ...)* @printf(i8* %fmt, double 1.87)
+; CHECK-NEXT-IPRINTF: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
+ ret void
+; CHECK-NEXT-IPRINTF: ret void
+}
+
+define void @test_no_simplify2(i8* %fmt, double %d) {
+; CHECK: @test_no_simplify2
+ call i32 (i8*, ...)* @printf(i8* %fmt, double %d)
+; CHECK-NEXT: call i32 (i8*, ...)* @printf(i8* %fmt, double %d)
+ ret void
+; CHECK-NEXT: ret void
+}
+
+define i32 @test_no_simplify3() {
+; CHECK: @test_no_simplify3
+ %fmt = getelementptr [2 x i8]* @h, i32 0, i32 0
+ %ret = call i32 (i8*, ...)* @printf(i8* %fmt)
+; CHECK-NEXT: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([2 x i8]* @h, i32 0, i32 0))
+ ret i32 %ret
+; CHECK-NEXT: ret i32 %ret
+}
diff --git a/test/Transforms/InstCombine/sdiv-1.ll b/test/Transforms/InstCombine/sdiv-1.ll
index c46b5eaef4..6ab18ac7f8 100644
--- a/test/Transforms/InstCombine/sdiv-1.ll
+++ b/test/Transforms/InstCombine/sdiv-1.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -instcombine -inline -S | not grep '-715827882'
+; RUN: opt < %s -instcombine -inline -S | FileCheck %s
; PR3142
+; CHECK-NOT: -715827882
+
define i32 @a(i32 %X) nounwind readnone {
entry:
%0 = sub i32 0, %X
diff --git a/test/Transforms/InstCombine/toascii-1.ll b/test/Transforms/InstCombine/toascii-1.ll
new file mode 100644
index 0000000000..c4a13e2293
--- /dev/null
+++ b/test/Transforms/InstCombine/toascii-1.ll
@@ -0,0 +1,59 @@
+; Test that the toascii library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @toascii(i32)
+
+; Check isascii(c) -> c & 0x7f.
+
+define i32 @test_simplify1() {
+; CHECK: @test_simplify1
+ %ret = call i32 @toascii(i32 0)
+ ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify2() {
+; CHECK: @test_simplify2
+ %ret = call i32 @toascii(i32 1)
+ ret i32 %ret
+; CHECK-NEXT: ret i32 1
+}
+
+define i32 @test_simplify3() {
+; CHECK: @test_simplify3
+ %ret = call i32 @toascii(i32 127)
+ ret i32 %ret
+; CHECK-NEXT: ret i32 127
+}
+
+define i32 @test_simplify4() {
+; CHECK: @test_simplify4
+ %ret = call i32 @toascii(i32 128)
+ ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify5() {
+; CHECK: @test_simplify5
+ %ret = call i32 @toascii(i32 255)
+ ret i32 %ret
+; CHECK-NEXT: ret i32 127
+}
+
+define i32 @test_simplify6() {
+; CHECK: @test_simplify6
+ %ret = call i32 @toascii(i32 256)
+ ret i32 %ret
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify7(i32 %x) {
+; CHECK: @test_simplify7
+ %ret = call i32 @toascii(i32 %x)
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 127
+ ret i32 %ret
+; CHECK-NEXT: ret i32 [[AND]]
+}
diff --git a/test/Transforms/InstCombine/vector_gep1.ll b/test/Transforms/InstCombine/vector_gep1.ll
index 6523622995..f4c75c8009 100644
--- a/test/Transforms/InstCombine/vector_gep1.ll
+++ b/test/Transforms/InstCombine/vector_gep1.ll
@@ -35,3 +35,8 @@ define <2 x i1> @test5(<2 x i8*> %a) {
%B = icmp ult <2 x i8*> %g, zeroinitializer
ret <2 x i1> %B
}
+
+define <2 x i32*> @test7(<2 x {i32, i32}*> %a) {
+ %w = getelementptr <2 x {i32, i32}*> %a, <2 x i32> <i32 5, i32 9>, <2 x i32> zeroinitializer
+ ret <2 x i32*> %w
+}
diff --git a/test/Transforms/InstCombine/xor2.ll b/test/Transforms/InstCombine/xor2.ll
index 89f00bd684..3c99246796 100644
--- a/test/Transforms/InstCombine/xor2.ll
+++ b/test/Transforms/InstCombine/xor2.ll
@@ -51,3 +51,18 @@ define i32 @test4(i32 %A, i32 %B) {
; CHECK: %1 = ashr i32 %A, %B
; CHECK: ret i32 %1
}
+
+; defect-2 in rdar://12329730
+; (X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3)
+; where the "X" has more than one use
+define i32 @test5(i32 %val1) {
+test5:
+ %xor = xor i32 %val1, 1234
+ %shr = lshr i32 %xor, 8
+ %xor1 = xor i32 %shr, 1
+ %add = add i32 %xor1, %xor
+ ret i32 %add
+; CHECK: @test5
+; CHECK: lshr i32 %val1, 8
+; CHECK: ret
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index ced74bd4be..ce2bb799c8 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -266,6 +266,15 @@ define i1 @add5(i32 %x, i32 %y) {
; CHECK: ret i1 true
}
+define i1 @add6(i64 %A, i64 %B) {
+; CHECK: @add6
+ %s1 = add i64 %A, %B
+ %s2 = add i64 %B, %A
+ %cmp = icmp eq i64 %s1, %s2
+ ret i1 %cmp
+; CHECK: ret i1 true
+}
+
define i1 @addpowtwo(i32 %x, i32 %y) {
; CHECK: @addpowtwo
%l = lshr i32 %x, 1
diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll
index fce29d2404..c34fd72a46 100644
--- a/test/Transforms/LoopVectorize/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -391,9 +391,9 @@ define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %o
ret void
}
-; Can't vectorize because of reductions.
+; Can vectorize.
;CHECK: @example14
-;CHECK-NOT: <4 x i32>
+;CHECK: <4 x i32>
;CHECK: ret void
define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocapture %out) nounwind uwtable ssp {
.preheader3:
@@ -565,9 +565,8 @@ define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
ret i32 %a.0.lcssa
}
-; Can't vectorize because there are multiple PHIs.
;CHECK: @example23
-;CHECK-NOT: <4 x i32>
+;CHECK: <4 x i32>
;CHECK: ret void
define void @example23(i16* nocapture %src, i32* nocapture %dst) nounwind uwtable ssp {
br label %1
diff --git a/test/Transforms/LoopVectorize/no_int_induction.ll b/test/Transforms/LoopVectorize/no_int_induction.ll
new file mode 100644
index 0000000000..516fd1de07
--- /dev/null
+++ b/test/Transforms/LoopVectorize/no_int_induction.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+
+; int __attribute__((noinline)) sum_array(int *A, int n) {
+; return std::accumulate(A, A + n, 0);
+; }
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @sum_array
+;CHECK: phi <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: ret i32
+define i32 @sum_array(i32* %A, i32 %n) nounwind uwtable readonly noinline ssp {
+ %1 = sext i32 %n to i64
+ %2 = getelementptr inbounds i32* %A, i64 %1
+ %3 = icmp eq i32 %n, 0
+ br i1 %3, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
+
+.lr.ph.i: ; preds = %0, %.lr.ph.i
+ %.03.i = phi i32* [ %6, %.lr.ph.i ], [ %A, %0 ]
+ %.012.i = phi i32 [ %5, %.lr.ph.i ], [ 0, %0 ]
+ %4 = load i32* %.03.i, align 4
+ %5 = add nsw i32 %4, %.012.i
+ %6 = getelementptr inbounds i32* %.03.i, i64 1
+ %7 = icmp eq i32* %6, %2
+ br i1 %7, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
+
+_ZSt10accumulateIPiiET0_T_S2_S1_.exit: ; preds = %.lr.ph.i, %0
+ %.01.lcssa.i = phi i32 [ 0, %0 ], [ %5, %.lr.ph.i ]
+ ret i32 %.01.lcssa.i
+}
diff --git a/test/Transforms/Reassociate/crash.ll b/test/Transforms/Reassociate/crash.ll
index ce586e15fb..e29b5dc9c0 100644
--- a/test/Transforms/Reassociate/crash.ll
+++ b/test/Transforms/Reassociate/crash.ll
@@ -144,3 +144,31 @@ define i32 @sozefx_(i32 %x, i32 %y) {
%t6 = add i32 %t4, %t5
ret i32 %t6
}
+
+define i32 @bar(i32 %arg, i32 %arg1, i32 %arg2) {
+ %tmp1 = mul i32 %arg1, 2
+ %tmp2 = mul i32 %tmp1, 3
+ %tmp3 = mul i32 %arg2, 2
+ %tmp4 = add i32 %tmp1, 1 ; dead code
+ %ret = add i32 %tmp2, %tmp3
+ ret i32 %ret
+}
+
+; PR14060
+define i8 @hang(i8 %p, i8 %p0, i8 %p1, i8 %p2, i8 %p3, i8 %p4, i8 %p5, i8 %p6, i8 %p7, i8 %p8, i8 %p9) {
+ %tmp = zext i1 false to i8
+ %tmp16 = or i8 %tmp, 1
+ %tmp22 = or i8 %p7, %p0
+ %tmp23 = or i8 %tmp16, %tmp22
+ %tmp28 = or i8 %p9, %p1
+ %tmp31 = or i8 %tmp23, %p2
+ %tmp32 = or i8 %tmp31, %tmp28
+ %tmp38 = or i8 %p8, %p3
+ %tmp39 = or i8 %tmp16, %tmp38
+ %tmp43 = or i8 %tmp39, %p4
+ %tmp44 = or i8 %tmp43, 1
+ %tmp47 = or i8 %tmp32, %p5
+ %tmp50 = or i8 %tmp47, %p6
+ %tmp51 = or i8 %tmp44, %tmp50
+ ret i8 %tmp51
+}
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index 110950f76a..b363eefb3f 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -1100,12 +1100,12 @@ entry:
%imag = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
store float %phi.real, float* %real
store float %phi.imag, float* %imag
+ ; CHECK-NEXT: %[[real_convert:.*]] = bitcast float %[[real]] to i32
; CHECK-NEXT: %[[imag_convert:.*]] = bitcast float %[[imag]] to i32
; CHECK-NEXT: %[[imag_ext:.*]] = zext i32 %[[imag_convert]] to i64
; CHECK-NEXT: %[[imag_shift:.*]] = shl i64 %[[imag_ext]], 32
; CHECK-NEXT: %[[imag_mask:.*]] = and i64 undef, 4294967295
; CHECK-NEXT: %[[imag_insert:.*]] = or i64 %[[imag_mask]], %[[imag_shift]]
- ; CHECK-NEXT: %[[real_convert:.*]] = bitcast float %[[real]] to i32
; CHECK-NEXT: %[[real_ext:.*]] = zext i32 %[[real_convert]] to i64
; CHECK-NEXT: %[[real_mask:.*]] = and i64 %[[imag_insert]], -4294967296
; CHECK-NEXT: %[[real_insert:.*]] = or i64 %[[real_mask]], %[[real_ext]]
diff --git a/test/Transforms/SROA/phi-and-select.ll b/test/Transforms/SROA/phi-and-select.ll
index d95e48f303..921016a9c2 100644
--- a/test/Transforms/SROA/phi-and-select.ll
+++ b/test/Transforms/SROA/phi-and-select.ll
@@ -390,3 +390,38 @@ if.then:
%tmpcast.d.0 = select i1 undef, i32* %c, i32* %d.0
br label %for.cond
}
+
+define i64 @PR14132(i1 %flag) {
+; CHECK: @PR14132
+; Here we form a PHI-node by promoting the pointer alloca first, and then in
+; order to promote the other two allocas, we speculate the load of the
+; now-phi-node-pointer. In doing so we end up loading a 64-bit value from an i8
+; alloca, which is completely bogus. However, we were asserting on trying to
+; rewrite it. Now it is replaced with undef. Eventually we may replace it with
+; unrechable and even the CFG will go away here.
+entry:
+ %a = alloca i64
+ %b = alloca i8
+ %ptr = alloca i64*
+; CHECK-NOT: alloca
+
+ %ptr.cast = bitcast i64** %ptr to i8**
+ store i64 0, i64* %a
+ store i8 1, i8* %b
+ store i64* %a, i64** %ptr
+ br i1 %flag, label %if.then, label %if.end
+
+if.then:
+ store i8* %b, i8** %ptr.cast
+ br label %if.end
+
+if.end:
+ %tmp = load i64** %ptr
+ %result = load i64* %tmp
+; CHECK-NOT: store
+; CHECK-NOT: load
+; CHECK: %[[result:.*]] = phi i64 [ undef, %if.then ], [ 0, %entry ]
+
+ ret i64 %result
+; CHECK-NEXT: ret i64 %[[result]]
+}
diff --git a/test/Transforms/SROA/vector-promotion.ll b/test/Transforms/SROA/vector-promotion.ll
index 02e084bf11..f1e118955d 100644
--- a/test/Transforms/SROA/vector-promotion.ll
+++ b/test/Transforms/SROA/vector-promotion.ll
@@ -36,15 +36,15 @@ entry:
define i32 @test2(<4 x i32> %x, <4 x i32> %y) {
; CHECK: @test2
-; FIXME: This should be handled!
entry:
%a = alloca [2 x <4 x i32>]
-; CHECK: alloca <4 x i32>
+; CHECK-NOT: alloca
%a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
store <4 x i32> %x, <4 x i32>* %a.x
%a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
store <4 x i32> %y, <4 x i32>* %a.y
+; CHECK-NOT: store
%a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
%tmp1 = load i32* %a.tmp1
@@ -54,10 +54,18 @@ entry:
%a.tmp3.cast = bitcast i32* %a.tmp3 to <2 x i32>*
%tmp3.vec = load <2 x i32>* %a.tmp3.cast
%tmp3 = extractelement <2 x i32> %tmp3.vec, i32 0
+; CHECK-NOT: load
+; CHECK: %[[extract1:.*]] = extractelement <4 x i32> %x, i32 2
+; CHECK-NEXT: %[[extract2:.*]] = extractelement <4 x i32> %y, i32 3
+; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> %y, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: %[[extract4:.*]] = extractelement <2 x i32> %[[extract3]], i32 0
%tmp4 = add i32 %tmp1, %tmp2
%tmp5 = add i32 %tmp3, %tmp4
ret i32 %tmp5
+; CHECK-NEXT: %[[sum1:.*]] = add i32 %[[extract1]], %[[extract2]]
+; CHECK-NEXT: %[[sum2:.*]] = add i32 %[[extract4]], %[[sum1]]
+; CHECK-NEXT: ret i32 %[[sum2]]
}
define i32 @test3(<4 x i32> %x, <4 x i32> %y) {
@@ -206,6 +214,71 @@ define i64 @test6(<4 x i64> %x, <4 x i64> %y, i64 %n) {
ret i64 %res
}
+define <4 x i32> @test_subvec_store() {
+; CHECK: @test_subvec_store
+entry:
+ %a = alloca <4 x i32>
+; CHECK-NOT: alloca
+
+ %a.gep0 = getelementptr <4 x i32>* %a, i32 0, i32 0
+ %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>*
+ store <2 x i32> <i32 0, i32 0>, <2 x i32>* %a.cast0
+; CHECK-NOT: store
+; CHECK: %[[insert1:.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>, <4 x i32> undef, <4 x i32> <i32 0, i32 1, {{.*}}>
+
+ %a.gep1 = getelementptr <4 x i32>* %a, i32 0, i32 1
+ %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>*
+ store <2 x i32> <i32 1, i32 1>, <2 x i32>* %a.cast1
+; CHECK-NEXT: %[[insert2:.*]] = shufflevector <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>, <4 x i32> %[[insert1]], <4 x i32> <i32 4, i32 1, i32 2, {{.*}}>
+
+ %a.gep2 = getelementptr <4 x i32>* %a, i32 0, i32 2
+ %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>*
+ store <2 x i32> <i32 2, i32 2>, <2 x i32>* %a.cast2
+; CHECK-NEXT: %[[insert3:.*]] = shufflevector <4 x i32> <i32 undef, i32 undef, i32 2, i32 2>, <4 x i32> %[[insert2]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+
+ %a.gep3 = getelementptr <4 x i32>* %a, i32 0, i32 3
+ store i32 3, i32* %a.gep3
+; CHECK-NEXT: %[[insert4:.*]] = insertelement <4 x i32> %[[insert3]], i32 3, i32 3
+
+ %ret = load <4 x i32>* %a
+
+ ret <4 x i32> %ret
+; CHECK-NEXT: ret <4 x i32> %[[insert4]]
+}
+
+define <4 x i32> @test_subvec_load() {
+; CHECK: @test_subvec_load
+entry:
+ %a = alloca <4 x i32>
+; CHECK-NOT: alloca
+ store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* %a
+; CHECK-NOT: store
+
+ %a.gep0 = getelementptr <4 x i32>* %a, i32 0, i32 0
+ %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>*
+ %first = load <2 x i32>* %a.cast0
+; CHECK-NOT: load
+; CHECK: %[[extract1:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+
+ %a.gep1 = getelementptr <4 x i32>* %a, i32 0, i32 1
+ %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>*
+ %second = load <2 x i32>* %a.cast1
+; CHECK-NEXT: %[[extract2:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 1, i32 2>
+
+ %a.gep2 = getelementptr <4 x i32>* %a, i32 0, i32 2
+ %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>*
+ %third = load <2 x i32>* %a.cast2
+; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+
+ %tmp = shufflevector <2 x i32> %first, <2 x i32> %second, <2 x i32> <i32 0, i32 2>
+ %ret = shufflevector <2 x i32> %tmp, <2 x i32> %third, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: %[[tmp:.*]] = shufflevector <2 x i32> %[[extract1]], <2 x i32> %[[extract2]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: %[[ret:.*]] = shufflevector <2 x i32> %[[tmp]], <2 x i32> %[[extract3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+
+ ret <4 x i32> %ret
+; CHECK-NEXT: ret <4 x i32> %[[ret]]
+}
+
define i32 @PR14212() {
; CHECK: @PR14212
; This caused a crash when "splitting" the load of the i32 in order to promote
@@ -220,3 +293,48 @@ entry:
ret i32 %load
; CHECK: ret i32
}
+
+define <2 x i8> @PR14349.1(i32 %x) {
+; CEHCK: @PR14349.1
+; The first testcase for broken SROA rewriting of split integer loads and
+; stores due to smaller vector loads and stores. This particular test ensures
+; that we can rewrite a split store of an integer to a store of a vector.
+entry:
+ %a = alloca i32
+; CHECK-NOT: alloca
+
+ store i32 %x, i32* %a
+; CHECK-NOT: store
+
+ %cast = bitcast i32* %a to <2 x i8>*
+ %vec = load <2 x i8>* %cast
+; CHECK-NOT: load
+
+ ret <2 x i8> %vec
+; CHECK: %[[trunc:.*]] = trunc i32 %x to i16
+; CHECK: %[[cast:.*]] = bitcast i16 %[[trunc]] to <2 x i8>
+; CHECK: ret <2 x i8> %[[cast]]
+}
+
+define i32 @PR14349.2(<2 x i8> %x) {
+; CEHCK: @PR14349.2
+; The first testcase for broken SROA rewriting of split integer loads and
+; stores due to smaller vector loads and stores. This particular test ensures
+; that we can rewrite a split load of an integer to a load of a vector.
+entry:
+ %a = alloca i32
+; CHECK-NOT: alloca
+
+ %cast = bitcast i32* %a to <2 x i8>*
+ store <2 x i8> %x, <2 x i8>* %cast
+; CHECK-NOT: store
+
+ %int = load i32* %a
+; CHECK-NOT: load
+
+ ret i32 %int
+; CHECK: %[[cast:.*]] = bitcast <2 x i8> %x to i16
+; CHECK: %[[trunc:.*]] = zext i16 %[[cast]] to i32
+; CHECK: %[[insert:.*]] = or i32 %{{.*}}, %[[trunc]]
+; CHECK: ret i32 %[[insert]]
+}
diff --git a/test/Transforms/SimplifyLibCalls/FFS.ll b/test/Transforms/SimplifyLibCalls/FFS.ll
deleted file mode 100644
index 6aecbeacd7..0000000000
--- a/test/Transforms/SimplifyLibCalls/FFS.ll
+++ /dev/null
@@ -1,45 +0,0 @@
-; Test that FFSOpt works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-; CHECK-NOT: call{{.*}}@ffs
-
-@non_const = external global i32 ; <i32*> [#uses=1]
-
-declare i32 @ffs(i32)
-
-declare i32 @ffsl(i32)
-
-declare i32 @ffsll(i64)
-
-define i32 @main() {
- %arg = load i32* @non_const ; <i32> [#uses=1]
- %val0 = call i32 @ffs( i32 %arg ) ; <i32> [#uses=1]
- %val1 = call i32 @ffs( i32 1 ) ; <i32> [#uses=1]
- %val2 = call i32 @ffs( i32 2048 ) ; <i32> [#uses=1]
- %val3 = call i32 @ffsl( i32 65536 ) ; <i32> [#uses=1]
- %val4 = call i32 @ffsll( i64 1024 ) ; <i32> [#uses=1]
- %val5 = call i32 @ffsll( i64 17179869184 ) ; <i32> [#uses=1]
- %val6 = call i32 @ffsll( i64 1152921504606846976 ) ; <i32> [#uses=1]
- %rslt1 = add i32 %val1, %val2 ; <i32> [#uses=1]
- %rslt2 = add i32 %val3, %val4 ; <i32> [#uses=1]
- %rslt3 = add i32 %val5, %val6 ; <i32> [#uses=1]
- %rslt4 = add i32 %rslt1, %rslt2 ; <i32> [#uses=1]
- %rslt5 = add i32 %rslt4, %rslt3 ; <i32> [#uses=2]
- %rslt6 = add i32 %rslt5, %val0 ; <i32> [#uses=0]
- ret i32 %rslt5
-}
-
-
-; PR4206
-define i32 @a(i64) nounwind {
- %2 = call i32 @ffsll(i64 %0) ; <i32> [#uses=1]
- ret i32 %2
-}
-
-; PR13028
-define i32 @b() nounwind {
- %ffs = call i32 @ffsll(i64 0)
- ret i32 %ffs
-; CHECK: @b
-; CHECK-NEXT: ret i32 0
-}
diff --git a/test/Transforms/SimplifyLibCalls/IsDigit.ll b/test/Transforms/SimplifyLibCalls/IsDigit.ll
deleted file mode 100644
index 51a769d9bb..0000000000
--- a/test/Transforms/SimplifyLibCalls/IsDigit.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; Test that the IsDigitOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN: not grep call
-
-declare i32 @isdigit(i32)
-
-declare i32 @isascii(i32)
-
-define i32 @main() {
- %val1 = call i32 @isdigit( i32 47 ) ; <i32> [#uses=1]
- %val2 = call i32 @isdigit( i32 48 ) ; <i32> [#uses=1]
- %val3 = call i32 @isdigit( i32 57 ) ; <i32> [#uses=1]
- %val4 = call i32 @isdigit( i32 58 ) ; <i32> [#uses=1]
- %rslt1 = add i32 %val1, %val2 ; <i32> [#uses=1]
- %rslt2 = add i32 %val3, %val4 ; <i32> [#uses=1]
- %sum = add i32 %rslt1, %rslt2 ; <i32> [#uses=1]
- %rslt = call i32 @isdigit( i32 %sum ) ; <i32> [#uses=1]
- %tmp = call i32 @isascii( i32 %rslt ) ; <i32> [#uses=1]
- ret i32 %tmp
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/Printf.ll b/test/Transforms/SimplifyLibCalls/Printf.ll
deleted file mode 100644
index 489c993f21..0000000000
--- a/test/Transforms/SimplifyLibCalls/Printf.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-@str = internal constant [13 x i8] c"hello world\0A\00" ; <[13 x i8]*> [#uses=1]
-@str1 = internal constant [2 x i8] c"h\00" ; <[2 x i8]*> [#uses=1]
-
-; CHECK: private unnamed_addr constant [12 x i8] c"hello world\00"
-
-declare i32 @printf(i8*, ...)
-
-; CHECK: define void @f0
-; CHECK-NOT: printf
-; CHECK: }
-define void @f0() {
-entry:
- %tmp1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([13 x i8]* @str, i32 0, i32 0) ) ; <i32> [#uses=0]
- ret void
-}
-
-; CHECK: define void @f1
-; CHECK-NOT: printf
-; CHECK: }
-define void @f1() {
-entry:
- %tmp1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([2 x i8]* @str1, i32 0, i32 0) ) ; <i32> [#uses=0]
- ret void
-}
-
-; Verify that we don't turn this into a putchar call (thus changing the return
-; value).
-;
-; CHECK: define i32 @f2
-; CHECK: printf
-; CHECK: }
-define i32 @f2() {
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([2 x i8]* @str1, i32 0, i32 0))
- ret i32 %call
-}
diff --git a/test/Transforms/SimplifyLibCalls/Puts.ll b/test/Transforms/SimplifyLibCalls/Puts.ll
index 48431434cc..aa68904810 100644
--- a/test/Transforms/SimplifyLibCalls/Puts.ll
+++ b/test/Transforms/SimplifyLibCalls/Puts.ll
@@ -1,7 +1,7 @@
; Test that the PutsOptimizer works correctly
; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-target datalayout = "-p:64:64:64"
+target datalayout = "p:64:64:64"
@.str = private constant [1 x i8] zeroinitializer
diff --git a/test/Transforms/SimplifyLibCalls/ToAscii.ll b/test/Transforms/SimplifyLibCalls/ToAscii.ll
deleted file mode 100644
index aef47333b3..0000000000
--- a/test/Transforms/SimplifyLibCalls/ToAscii.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; Test that the ToAsciiOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN: not grep "call.*toascii"
-
-declare i32 @toascii(i32)
-
-define i32 @main() {
- %val1 = call i32 @toascii( i32 1 ) ; <i32> [#uses=1]
- %val2 = call i32 @toascii( i32 0 ) ; <i32> [#uses=1]
- %val3 = call i32 @toascii( i32 127 ) ; <i32> [#uses=1]
- %val4 = call i32 @toascii( i32 128 ) ; <i32> [#uses=1]
- %val5 = call i32 @toascii( i32 255 ) ; <i32> [#uses=1]
- %val6 = call i32 @toascii( i32 256 ) ; <i32> [#uses=1]
- %rslt1 = add i32 %val1, %val2 ; <i32> [#uses=1]
- %rslt2 = add i32 %val3, %val4 ; <i32> [#uses=1]
- %rslt3 = add i32 %val5, %val6 ; <i32> [#uses=1]
- %rslt4 = add i32 %rslt1, %rslt2 ; <i32> [#uses=1]
- %rslt5 = add i32 %rslt4, %rslt3 ; <i32> [#uses=1]
- ret i32 %rslt5
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/abs.ll b/test/Transforms/SimplifyLibCalls/abs.ll
deleted file mode 100644
index 3934a5b98f..0000000000
--- a/test/Transforms/SimplifyLibCalls/abs.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | grep "select i1 %ispos"
-; PR2337
-
-define i32 @test(i32 %x) {
-entry:
- %call = call i32 @abs( i32 %x ) ; <i32> [#uses=1]
- ret i32 %call
-}
-
-declare i32 @abs(i32)
-
diff --git a/test/Transforms/SimplifyLibCalls/cos.ll b/test/Transforms/SimplifyLibCalls/cos.ll
deleted file mode 100644
index 6a8ce8c388..0000000000
--- a/test/Transforms/SimplifyLibCalls/cos.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define double @foo(double %d) nounwind readnone {
-; CHECK: @foo
- %1 = fsub double -0.000000e+00, %d
- %2 = call double @cos(double %1) nounwind readnone
-; CHECK: call double @cos(double %d)
- ret double %2
-}
-
-declare double @cos(double) nounwind readnone
diff --git a/test/Transforms/SimplifyLibCalls/exp2.ll b/test/Transforms/SimplifyLibCalls/exp2.ll
deleted file mode 100644
index a5927757cf..0000000000
--- a/test/Transforms/SimplifyLibCalls/exp2.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | grep "call.*ldexp" | count 4
-; rdar://5852514
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin8"
-
-define double @t1(i32 %x) nounwind {
-entry:
- %tmp12 = sitofp i32 %x to double ; <double> [#uses=1]
- %exp2 = tail call double @exp2( double %tmp12 ) ; <double> [#uses=1]
- ret double %exp2
-}
-
-define float @t4(i8 zeroext %x) nounwind {
-entry:
- %tmp12 = uitofp i8 %x to float ; <float> [#uses=1]
- %tmp3 = tail call float @exp2f( float %tmp12 ) nounwind readonly ; <float> [#uses=1]
- ret float %tmp3
-}
-
-declare float @exp2f(float) nounwind readonly
-
-define double @t3(i16 zeroext %x) nounwind {
-entry:
- %tmp12 = uitofp i16 %x to double ; <double> [#uses=1]
- %exp2 = tail call double @exp2( double %tmp12 ) ; <double> [#uses=1]
- ret double %exp2
-}
-
-define double @t2(i16 signext %x) nounwind {
-entry:
- %tmp12 = sitofp i16 %x to double ; <double> [#uses=1]
- %exp2 = tail call double @exp2( double %tmp12 ) ; <double> [#uses=1]
- ret double %exp2
-}
-
-declare double @exp2(double)
-
diff --git a/test/Transforms/SimplifyLibCalls/floor.ll b/test/Transforms/SimplifyLibCalls/floor.ll
deleted file mode 100644
index 93c62c2002..0000000000
--- a/test/Transforms/SimplifyLibCalls/floor.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S -mtriple "i386-pc-linux" | FileCheck -check-prefix=DO-SIMPLIFY %s
-; RUN: opt < %s -simplify-libcalls -S -mtriple "i386-pc-win32" | FileCheck -check-prefix=DONT-SIMPLIFY %s
-; RUN: opt < %s -simplify-libcalls -S -mtriple "x86_64-pc-win32" | FileCheck -check-prefix=C89-SIMPLIFY %s
-; RUN: opt < %s -simplify-libcalls -S -mtriple "i386-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s
-; RUN: opt < %s -simplify-libcalls -S -mtriple "x86_64-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s
-; RUN: opt < %s -simplify-libcalls -S -mtriple "sparc-sun-solaris" | FileCheck -check-prefix=DO-SIMPLIFY %s
-
-; DO-SIMPLIFY: call float @floorf(
-; DO-SIMPLIFY: call float @ceilf(
-; DO-SIMPLIFY: call float @roundf(
-; DO-SIMPLIFY: call float @nearbyintf(
-; DO-SIMPLIFY: call float @truncf(
-; DO-SIMPLIFY: call float @fabsf(
-
-; C89-SIMPLIFY: call float @floorf(
-; C89-SIMPLIFY: call float @ceilf(
-; C89-SIMPLIFY: call double @round(
-; C89-SIMPLIFY: call double @nearbyint(
-
-; DONT-SIMPLIFY: call double @floor(
-; DONT-SIMPLIFY: call double @ceil(
-; DONT-SIMPLIFY: call double @round(
-; DONT-SIMPLIFY: call double @nearbyint(
-; DONT-SIMPLIFY: call double @trunc(
-; DONT-SIMPLIFY: call double @fabs(
-
-declare double @floor(double)
-
-declare double @ceil(double)
-
-declare double @round(double)
-
-declare double @nearbyint(double)
-
-declare double @trunc(double)
-
-declare double @fabs(double)
-
-define float @test_floor(float %C) {
- %D = fpext float %C to double ; <double> [#uses=1]
- ; --> floorf
- %E = call double @floor( double %D ) ; <double> [#uses=1]
- %F = fptrunc double %E to float ; <float> [#uses=1]
- ret float %F
-}
-
-define float @test_ceil(float %C) {
- %D = fpext float %C to double ; <double> [#uses=1]
- ; --> ceilf
- %E = call double @ceil( double %D ) ; <double> [#uses=1]
- %F = fptrunc double %E to float ; <float> [#uses=1]
- ret float %F
-}
-
-define float @test_round(float %C) {
- %D = fpext float %C to double ; <double> [#uses=1]
- ; --> roundf
- %E = call double @round( double %D ) ; <double> [#uses=1]
- %F = fptrunc double %E to float ; <float> [#uses=1]
- ret float %F
-}
-
-define float @test_nearbyint(float %C) {
- %D = fpext float %C to double ; <double> [#uses=1]
- ; --> nearbyintf
- %E = call double @nearbyint( double %D ) ; <double> [#uses=1]
- %F = fptrunc double %E to float ; <float> [#uses=1]
- ret float %F
-}
-
-define float @test_trunc(float %C) {
- %D = fpext float %C to double
- ; --> truncf
- %E = call double @trunc(double %D)
- %F = fptrunc double %E to float
- ret float %F
-}
-
-define float @test_fabs(float %C) {
- %D = fpext float %C to double
- ; --> fabsf
- %E = call double @fabs(double %D)
- %F = fptrunc double %E to float
- ret float %F
-}
diff --git a/test/Transforms/SimplifyLibCalls/iprintf.ll b/test/Transforms/SimplifyLibCalls/iprintf.ll
index 7f036fe3ab..d6a7074db1 100644
--- a/test/Transforms/SimplifyLibCalls/iprintf.ll
+++ b/test/Transforms/SimplifyLibCalls/iprintf.ll
@@ -6,26 +6,6 @@ target triple = "xcore-xmos-elf"
@.str = internal constant [4 x i8] c"%f\0A\00" ; <[4 x i8]*> [#uses=1]
@.str1 = internal constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1]
-; Verify printf with no floating point arguments is transformed to iprintf
-define i32 @f0(i32 %x) nounwind {
-entry:
-; CHECK: define i32 @f0
-; CHECK: @iprintf
-; CHECK: }
- %0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str1, i32 0, i32 0), i32 %x) ; <i32> [#uses=0]
- ret i32 %0
-}
-
-; Verify we don't turn this into an iprintf call
-define void @f1(double %x) nounwind {
-entry:
-; CHECK: define void @f1
-; CHECK: @printf
-; CHECK: }
- %0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), double %x) nounwind ; <i32> [#uses=0]
- ret void
-}
-
; Verify sprintf with no floating point arguments is transformed to siprintf
define i32 @f2(i8* %p, i32 %x) nounwind {
entry:
@@ -66,6 +46,5 @@ entry:
ret i32 %0
}
-declare i32 @printf(i8* nocapture, ...) nounwind
declare i32 @sprintf(i8* nocapture, i8* nocapture, ...) nounwind
declare i32 @fprintf(i8* nocapture, i8* nocapture, ...) nounwind
diff --git a/test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll b/test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll
deleted file mode 100644
index 0480fdda89..0000000000
--- a/test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-; rdar://7251832
-
-; SimplifyLibcalls should optimize pow(x, 0.5) to sqrt plus code to handle
-; special cases. The readonly attribute on the call should be preserved.
-
-; CHECK: define float @foo(float %x) nounwind {
-; CHECK: %sqrtf = call float @sqrtf(float %x) nounwind readonly
-; CHECK: %fabsf = call float @fabsf(float %sqrtf) nounwind readonly
-; CHECK: %1 = fcmp oeq float %x, 0xFFF0000000000000
-; CHECK: %retval = select i1 %1, float 0x7FF0000000000000, float %fabsf
-; CHECK: ret float %retval
-
-define float @foo(float %x) nounwind {
- %retval = call float @powf(float %x, float 0.5)
- ret float %retval
-}
-
-; CHECK: define double @doo(double %x) nounwind {
-; CHECK: %sqrt = call double @sqrt(double %x) nounwind readonly
-; CHECK: %fabs = call double @fabs(double %sqrt) nounwind readonly
-; CHECK: %1 = fcmp oeq double %x, 0xFFF0000000000000
-; CHECK: %retval = select i1 %1, double 0x7FF0000000000000, double %fabs
-; CHECK: ret double %retval
-; CHECK: }
-
-define double @doo(double %x) nounwind {
- %retval = call double @pow(double %x, double 0.5)
- ret double %retval
-}
-
-declare float @powf(float, float) nounwind readonly
-declare double @pow(double, double) nounwind readonly
diff --git a/test/Transforms/SimplifyLibCalls/pow2.ll b/test/Transforms/SimplifyLibCalls/pow2.ll
deleted file mode 100644
index f0964e7d6d..0000000000
--- a/test/Transforms/SimplifyLibCalls/pow2.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; Testcase for calls to the standard C "pow" function
-;
-; RUN: opt < %s -simplify-libcalls -S | not grep "call .pow"
-
-
-declare double @pow(double, double)
-declare float @powf(float, float)
-
-define double @test1(double %X) {
- %Y = call double @pow( double %X, double 0.000000e+00 ) ; <double> [#uses=1]
- ret double %Y
-}
-
-define double @test2(double %X) {
- %Y = call double @pow( double %X, double -0.000000e+00 ) ; <double> [#uses=1]
- ret double %Y
-}
-
-define double @test3(double %X) {
- %Y = call double @pow( double 1.000000e+00, double %X ) ; <double> [#uses=1]
- ret double %Y
-}
-
-define double @test4(double %X) {
- %Y = call double @pow( double %X, double 2.0)
- ret double %Y
-}
-
-define float @test4f(float %X) {
- %Y = call float @powf( float %X, float 2.0)
- ret float %Y
-}
-
-define float @test5f(float %X) {
- %Y = call float @powf(float 2.0, float %X) ;; exp2
- ret float %Y
-}
diff --git a/test/lit.cfg b/test/lit.cfg
index 79eaa23c8b..5a4cceda0e 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -197,7 +197,7 @@ for pattern in [r"\bbugpoint\b(?!-)", r"(?<!/|-)\bclang\b(?!-)",
r"\bllvm-bcanalyzer\b", r"\bllvm-config\b",
r"\bllvm-cov\b", r"\bllvm-diff\b",
r"\bllvm-dis\b", r"\bllvm-dwarfdump\b",
- r"\bllvm-extract\b",
+ r"\bllvm-extract\b", r"\bllvm-jistlistener\b",
r"\bllvm-link\b", r"\bllvm-mc\b",
r"\bllvm-nm\b", r"\bllvm-objdump\b",
r"\bllvm-prof\b", r"\bllvm-ranlib\b",
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index 2bbe63e634..7a328f0f28 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -16,6 +16,7 @@ config.targets_to_build = "@TARGETS_TO_BUILD@"
config.llvm_bindings = "@LLVM_BINDINGS@"
config.host_os = "@HOST_OS@"
config.host_arch = "@HOST_ARCH@"
+config.llvm_use_intel_jitevents = "@LLVM_USE_INTEL_JITEVENTS@"
# Support substitution of the tools_dir with user parameters. This is
# used when we can't determine the tool dir at configuration time.
diff --git a/test/tools/llvm-objdump/disassembly-show-raw.s b/test/tools/llvm-objdump/disassembly-show-raw.s
new file mode 100644
index 0000000000..32fcad4a36
--- /dev/null
+++ b/test/tools/llvm-objdump/disassembly-show-raw.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -filetype=obj -arch=x86 %s | llvm-objdump -d - \
+// RUN: | FileCheck %s -check-prefix=WITHRAW
+// RUN: llvm-mc -filetype=obj -arch=x86 %s | llvm-objdump -d -no-show-raw-insn - \
+// RUN: | FileCheck %s -check-prefix=NORAW
+
+// Expect to find the raw incoding when run with raw output (default), but not
+// when run explicitly with -no-show-raw-insn
+
+movl 0, %eax
+// WITHRAW: a1 00 00 00 00 movl
+
+// NORAW: movl
+// NORAW-NOT: a1 00
+
+
diff --git a/test/CodeGen/CellSPU/lit.local.cfg b/test/tools/llvm-objdump/lit.local.cfg
index ea00867701..56bf008595 100644
--- a/test/CodeGen/CellSPU/lit.local.cfg
+++ b/test/tools/llvm-objdump/lit.local.cfg
@@ -1,6 +1,6 @@
-config.suffixes = ['.ll', '.c', '.cpp']
+config.suffixes = ['.ll', '.s']
targets = set(config.root.targets_to_build.split())
-if not 'CellSPU' in targets:
+if not 'X86' in targets:
config.unsupported = True
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 6918285622..144e8ec3ea 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -31,6 +31,9 @@ add_subdirectory(llvm-objdump)
add_subdirectory(llvm-readobj)
add_subdirectory(llvm-rtdyld)
add_subdirectory(llvm-dwarfdump)
+if( LLVM_USE_INTEL_JITEVENTS )
+ add_subdirectory(llvm-jitlistener)
+endif( LLVM_USE_INTEL_JITEVENTS )
add_subdirectory(bugpoint)
add_subdirectory(bugpoint-passes)
@@ -38,6 +41,8 @@ add_subdirectory(llvm-bcanalyzer)
add_subdirectory(llvm-stress)
add_subdirectory(llvm-mcmarkup)
+add_subdirectory(llvm-symbolizer)
+
if( NOT WIN32 )
add_subdirectory(lto)
endif()
diff --git a/tools/LLVMBuild.txt b/tools/LLVMBuild.txt
index 64164792a7..25aa177b35 100644
--- a/tools/LLVMBuild.txt
+++ b/tools/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = bugpoint llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-link llvm-mc llvm-nm llvm-objdump llvm-prof llvm-ranlib llvm-rtdyld llvm-size macho-dump opt llvm-mcmarkup
+subdirectories = bugpoint llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-jitlistener llvm-link llvm-mc llvm-nm llvm-objdump llvm-prof llvm-ranlib llvm-rtdyld llvm-size macho-dump opt llvm-mcmarkup
[component_0]
type = Group
diff --git a/tools/Makefile b/tools/Makefile
index 17e8380677..69f42d9495 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -33,8 +33,9 @@ PARALLEL_DIRS := opt llvm-as llvm-dis \
lli llvm-extract llvm-mc \
bugpoint llvm-bcanalyzer \
llvm-diff macho-dump llvm-objdump llvm-readobj \
- llvm-rtdyld llvm-dwarfdump llvm-cov \
- llvm-size llvm-stress llvm-mcmarkup bc-wrap pso-stub
+ llvm-rtdyld llvm-dwarfdump llvm-cov llvm-jitlistener \
+ llvm-size llvm-stress llvm-mcmarkup bc-wrap pso-stub \
+ llvm-symbolizer
# Let users override the set of tools to build from the command line.
ifdef ONLY_TOOLS
diff --git a/tools/bugpoint/CrashDebugger.cpp b/tools/bugpoint/CrashDebugger.cpp
index aed16f47e0..8836eedb47 100644
--- a/tools/bugpoint/CrashDebugger.cpp
+++ b/tools/bugpoint/CrashDebugger.cpp
@@ -412,7 +412,9 @@ bool ReduceCrashingInstructions::TestInsts(std::vector<const Instruction*>
// Verify that this is still valid.
PassManager Passes;
Passes.add(createVerifierPass());
+ Passes.doInitialization();
Passes.run(*M);
+ Passes.doFinalization();
// Try running on the hacked up program...
if (TestFn(BD, M)) {
diff --git a/tools/lli/RecordingMemoryManager.cpp b/tools/lli/RecordingMemoryManager.cpp
index 9e1cff5527..75cb978130 100644
--- a/tools/lli/RecordingMemoryManager.cpp
+++ b/tools/lli/RecordingMemoryManager.cpp
@@ -28,7 +28,8 @@ allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID) {
}
uint8_t *RecordingMemoryManager::
-allocateDataSection(uintptr_t Size, unsigned Alignment, unsigned SectionID) {
+allocateDataSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID, bool IsReadOnly) {
// The recording memory manager is just a local copy of the remote target.
// The alignment requirement is just stored here for later use. Regular
// heap storage is sufficient here.
@@ -81,7 +82,20 @@ void RecordingMemoryManager::endExceptionTable(const Function *F, uint8_t *Table
void RecordingMemoryManager::deallocateExceptionTable(void *ET) {
llvm_unreachable("Unexpected!");
}
+
+static int jit_noop() {
+ return 0;
+}
+
void *RecordingMemoryManager::getPointerToNamedFunction(const std::string &Name,
bool AbortOnFailure) {
+ // We should not invoke parent's ctors/dtors from generated main()!
+ // On Mingw and Cygwin, the symbol __main is resolved to
+ // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+ // (and register wrong callee's dtors with atexit(3)).
+ // We expect ExecutionEngine::runStaticConstructorsDestructors()
+ // is called before ExecutionEngine::runFunctionAsMain() is called.
+ if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
return NULL;
}
diff --git a/tools/lli/RecordingMemoryManager.h b/tools/lli/RecordingMemoryManager.h
index 1590235a79..20fd0c2e6e 100644
--- a/tools/lli/RecordingMemoryManager.h
+++ b/tools/lli/RecordingMemoryManager.h
@@ -47,10 +47,13 @@ public:
unsigned SectionID);
uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID);
+ unsigned SectionID, bool IsReadOnly);
void *getPointerToNamedFunction(const std::string &Name,
bool AbortOnFailure = true);
+
+ bool applyPermissions(std::string *ErrMsg) { return false; }
+
// The following obsolete JITMemoryManager calls are stubbed out for
// this model.
void setMemoryWritable();
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index d41a595de8..fa4669dec6 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -231,11 +231,13 @@ public:
unsigned SectionID);
virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID);
+ unsigned SectionID, bool IsReadOnly);
virtual void *getPointerToNamedFunction(const std::string &Name,
bool AbortOnFailure = true);
+ virtual bool applyPermissions(std::string *ErrMsg) { return false; }
+
// Invalidate instruction cache for code sections. Some platforms with
// separate data cache and instruction cache require explicit cache flush,
// otherwise JIT code manipulations (like resolved relocations) will get to
@@ -301,7 +303,8 @@ public:
uint8_t *LLIMCJITMemoryManager::allocateDataSection(uintptr_t Size,
unsigned Alignment,
- unsigned SectionID) {
+ unsigned SectionID,
+ bool IsReadOnly) {
if (!Alignment)
Alignment = 16;
// Ensure that enough memory is requested to allow aligning.
diff --git a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index e73300a0cd..2229a3aa98 100644
--- a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -69,100 +69,8 @@ static void DumpInput(const StringRef &Filename) {
}
OwningPtr<ObjectFile> Obj(ObjectFile::createObjectFile(Buff.take()));
+ OwningPtr<DIContext> dictx(DIContext::getDWARFContext(Obj.get()));
- StringRef DebugInfoSection;
- RelocAddrMap RelocMap;
- StringRef DebugAbbrevSection;
- StringRef DebugLineSection;
- StringRef DebugArangesSection;
- StringRef DebugStringSection;
- StringRef DebugRangesSection;
-
- error_code ec;
- for (section_iterator i = Obj->begin_sections(),
- e = Obj->end_sections();
- i != e; i.increment(ec)) {
- StringRef name;
- i->getName(name);
- StringRef data;
- i->getContents(data);
-
- if (name.startswith("__DWARF,"))
- name = name.substr(8); // Skip "__DWARF," prefix.
- name = name.substr(name.find_first_not_of("._")); // Skip . and _ prefixes.
- if (name == "debug_info")
- DebugInfoSection = data;
- else if (name == "debug_abbrev")
- DebugAbbrevSection = data;
- else if (name == "debug_line")
- DebugLineSection = data;
- else if (name == "debug_aranges")
- DebugArangesSection = data;
- else if (name == "debug_str")
- DebugStringSection = data;
- else if (name == "debug_ranges")
- DebugRangesSection = data;
- // Any more debug info sections go here.
- else
- continue;
-
- // TODO: For now only handle relocations for the debug_info section.
- if (name != "debug_info")
- continue;
-
- if (i->begin_relocations() != i->end_relocations()) {
- uint64_t SectionSize;
- i->getSize(SectionSize);
- for (relocation_iterator reloc_i = i->begin_relocations(),
- reloc_e = i->end_relocations();
- reloc_i != reloc_e; reloc_i.increment(ec)) {
- uint64_t Address;
- reloc_i->getAddress(Address);
- uint64_t Type;
- reloc_i->getType(Type);
-
- RelocVisitor V(Obj->getFileFormatName());
- // The section address is always 0 for debug sections.
- RelocToApply R(V.visit(Type, *reloc_i));
- if (V.error()) {
- SmallString<32> Name;
- error_code ec(reloc_i->getTypeName(Name));
- if (ec) {
- errs() << "Aaaaaa! Nameless relocation! Aaaaaa!\n";
- }
- errs() << "error: failed to compute relocation: "
- << Name << "\n";
- continue;
- }
-
- if (Address + R.Width > SectionSize) {
- errs() << "error: " << R.Width << "-byte relocation starting "
- << Address << " bytes into section " << name << " which is "
- << SectionSize << " bytes long.\n";
- continue;
- }
- if (R.Width > 8) {
- errs() << "error: can't handle a relocation of more than 8 bytes at "
- "a time.\n";
- continue;
- }
- DEBUG(dbgs() << "Writing " << format("%p", R.Value)
- << " at " << format("%p", Address)
- << " with width " << format("%d", R.Width)
- << "\n");
- RelocMap[Address] = std::make_pair(R.Width, R.Value);
- }
- }
- }
-
- OwningPtr<DIContext> dictx(DIContext::getDWARFContext(/*FIXME*/true,
- DebugInfoSection,
- DebugAbbrevSection,
- DebugArangesSection,
- DebugLineSection,
- DebugStringSection,
- DebugRangesSection,
- RelocMap));
if (Address == -1ULL) {
outs() << Filename
<< ":\tfile format " << Obj->getFileFormatName() << "\n\n";
diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp
index 40fd51331e..0e280c1780 100644
--- a/tools/llvm-extract/llvm-extract.cpp
+++ b/tools/llvm-extract/llvm-extract.cpp
@@ -323,7 +323,9 @@ int main(int argc, char **argv) {
else if (Force || !CheckBitcodeOutputToConsole(Out.os(), true))
Passes.add(createBitcodeWriterPass(Out.os()));
+ Passes.doInitialization();
Passes.run(*M.get());
+ Passes.doFinalization();
// Declare success.
Out.keep();
diff --git a/tools/llvm-jitlistener/CMakeLists.txt b/tools/llvm-jitlistener/CMakeLists.txt
new file mode 100644
index 0000000000..57a4a0cbe1
--- /dev/null
+++ b/tools/llvm-jitlistener/CMakeLists.txt
@@ -0,0 +1,20 @@
+# This tool is excluded from the CMake build if Intel JIT events are disabled.
+
+link_directories( ${LLVM_INTEL_JITEVENTS_LIBDIR} )
+include_directories( ${LLVM_INTEL_JITEVENTS_INCDIR} )
+
+set(LLVM_LINK_COMPONENTS
+ asmparser
+ bitreader
+ inteljitevents
+ interpreter
+ jit
+ mcjit
+ nativecodegen
+ object
+ selectiondag
+ )
+
+add_llvm_tool(llvm-jitlistener
+ llvm-jitlistener.cpp
+ )
diff --git a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt b/tools/llvm-jitlistener/LLVMBuild.txt
index 6937e705ff..c436dd90f9 100644
--- a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt
+++ b/tools/llvm-jitlistener/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/CellSPU/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
+;===- ./tools/llvm-jitlistener/LLVMBuild.txt -------------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
@@ -16,8 +16,7 @@
;===------------------------------------------------------------------------===;
[component_0]
-type = Library
-name = CellSPUInfo
-parent = CellSPU
-required_libraries = MC Support Target
-add_to_library_groups = CellSPU
+type = Tool
+name = llvm-jitlistener
+parent = Tools
+required_libraries = AsmParser BitReader Interpreter JIT MCJIT NativeCodeGen Object SelectionDAG Native
diff --git a/tools/llvm-jitlistener/Makefile b/tools/llvm-jitlistener/Makefile
new file mode 100644
index 0000000000..0971e6a252
--- /dev/null
+++ b/tools/llvm-jitlistener/Makefile
@@ -0,0 +1,27 @@
+##===- tools/llvm-jitlistener/Makefile ---------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := llvm-jitlistener
+
+include $(LEVEL)/Makefile.config
+
+LINK_COMPONENTS := mcjit jit interpreter nativecodegen bitreader asmparser selectiondag Object
+
+# If Intel JIT Events support is configured, link against the LLVM Intel JIT
+# Events interface library. If not, this tool will do nothing useful, but it
+# will build correctly.
+ifeq ($(USE_INTEL_JITEVENTS), 1)
+ LINK_COMPONENTS += inteljitevents
+endif
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS := 1
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/tools/llvm-jitlistener/llvm-jitlistener.cpp b/tools/llvm-jitlistener/llvm-jitlistener.cpp
new file mode 100644
index 0000000000..2b05e66e98
--- /dev/null
+++ b/tools/llvm-jitlistener/llvm-jitlistener.cpp
@@ -0,0 +1,207 @@
+//===-- llvm-jitlistener.cpp - Utility for testing MCJIT event listener ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program is a used by lit tests to verify the MCJIT JITEventListener
+// interface. It registers a mock JIT event listener, generates a module from
+// an input IR file and dumps the reported event information to stdout.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
+#include "../../lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/ExecutionEngine/MCJIT.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/IRReader.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetSelect.h"
+#include <string>
+
+using namespace llvm;
+
+namespace {
+
+typedef std::vector<std::pair<std::string, unsigned int> > SourceLocations;
+typedef std::map<uint64_t, SourceLocations> NativeCodeMap;
+
+NativeCodeMap ReportedDebugFuncs;
+
+int NotifyEvent(iJIT_JVM_EVENT EventType, void *EventSpecificData) {
+ switch (EventType) {
+ case iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED: {
+ if (!EventSpecificData) {
+ errs() <<
+ "Error: The JIT event listener did not provide a event data.";
+ return -1;
+ }
+ iJIT_Method_Load* msg = static_cast<iJIT_Method_Load*>(EventSpecificData);
+
+ ReportedDebugFuncs[msg->method_id];
+
+ outs() << "Method load [" << msg->method_id << "]: " << msg->method_name
+ << ", Size = " << msg->method_size << "\n";
+
+ for(unsigned int i = 0; i < msg->line_number_size; ++i) {
+ if (!msg->line_number_table) {
+ errs() << "A function with a non-zero line count had no line table.";
+ return -1;
+ }
+ std::pair<std::string, unsigned int> loc(
+ std::string(msg->source_file_name),
+ msg->line_number_table[i].LineNumber);
+ ReportedDebugFuncs[msg->method_id].push_back(loc);
+ outs() << " Line info @ " << msg->line_number_table[i].Offset
+ << ": " << msg->source_file_name
+ << ", line " << msg->line_number_table[i].LineNumber << "\n";
+ }
+ outs() << "\n";
+ }
+ break;
+ case iJVM_EVENT_TYPE_METHOD_UNLOAD_START: {
+ if (!EventSpecificData) {
+ errs() <<
+ "Error: The JIT event listener did not provide a event data.";
+ return -1;
+ }
+ unsigned int UnloadId
+ = *reinterpret_cast<unsigned int*>(EventSpecificData);
+ assert(1 == ReportedDebugFuncs.erase(UnloadId));
+ outs() << "Method unload [" << UnloadId << "]\n";
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+iJIT_IsProfilingActiveFlags IsProfilingActive(void) {
+ // for testing, pretend we have an Intel Parallel Amplifier XE 2011
+ // instance attached
+ return iJIT_SAMPLING_ON;
+}
+
+unsigned int GetNewMethodID(void) {
+ static unsigned int id = 0;
+ return ++id;
+}
+
+class JitEventListenerTest {
+protected:
+ void InitEE(const std::string &IRFile) {
+ LLVMContext &Context = getGlobalContext();
+
+ // If we have a native target, initialize it to ensure it is linked in and
+ // usable by the JIT.
+ InitializeNativeTarget();
+ InitializeNativeTargetAsmPrinter();
+
+ // Parse the bitcode...
+ SMDiagnostic Err;
+ TheModule = ParseIRFile(IRFile, Err, Context);
+ if (!TheModule) {
+ errs() << Err.getMessage();
+ return;
+ }
+
+ // FIXME: This is using the default legacy JITMemoryManager because it
+ // supports poison memory. At some point, we'll need to update this to
+ // use an MCJIT-specific memory manager. It might be nice to have the
+ // poison memory option there too.
+ JITMemoryManager *MemMgr = JITMemoryManager::CreateDefaultMemManager();
+ if (!MemMgr) {
+ errs() << "Unable to create memory manager.";
+ return;
+ }
+
+ // Tell the memory manager to poison freed memory so that accessing freed
+ // memory is more easily tested.
+ MemMgr->setPoisonMemory(true);
+
+ // Override the triple to generate ELF on Windows since that's supported
+ Triple Tuple(TheModule->getTargetTriple());
+ if (Tuple.getTriple().empty())
+ Tuple.setTriple(LLVM_HOSTTRIPLE);
+
+ if (Tuple.isOSWindows() && Triple::ELF != Tuple.getEnvironment()) {
+ Tuple.setEnvironment(Triple::ELF);
+ TheModule->setTargetTriple(Tuple.getTriple());
+ }
+
+ // Compile the IR
+ std::string Error;
+ TheJIT.reset(EngineBuilder(TheModule)
+ .setEngineKind(EngineKind::JIT)
+ .setErrorStr(&Error)
+ .setJITMemoryManager(MemMgr)
+ .setUseMCJIT(true)
+ .create());
+ if (Error.empty() == false)
+ errs() << Error;
+ }
+
+ void DestroyEE() {
+ TheJIT.reset();
+ }
+
+ LLVMContext Context; // Global ownership
+ Module *TheModule; // Owned by ExecutionEngine.
+ JITMemoryManager *JMM; // Owned by ExecutionEngine.
+ OwningPtr<ExecutionEngine> TheJIT;
+
+public:
+ void ProcessInput(const std::string &Filename) {
+ InitEE(Filename);
+
+ llvm::OwningPtr<llvm::JITEventListener> Listener(JITEventListener::createIntelJITEventListener(
+ new IntelJITEventsWrapper(NotifyEvent, 0,
+ IsProfilingActive, 0, 0,
+ GetNewMethodID)));
+
+ TheJIT->RegisterJITEventListener(Listener.get());
+
+ TheJIT->finalizeObject();
+
+ // Destroy the JIT engine instead of unregistering to get unload events.
+ DestroyEE();
+ }
+};
+
+
+
+} // end anonymous namespace
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input IR file>"),
+ cl::Required);
+
+int main(int argc, char **argv) {
+ // Print a stack trace if we signal out.
+ sys::PrintStackTraceOnErrorSignal();
+ PrettyStackTraceProgram X(argc, argv);
+ llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
+
+ cl::ParseCommandLineOptions(argc, argv, "llvm jit event listener test utility\n");
+
+ JitEventListenerTest Test;
+
+ Test.ProcessInput(InputFilename);
+
+ return 0;
+}
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index f7c3748f07..8329a41f25 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -69,6 +69,9 @@ static cl::opt<bool>
RelaxAll("mc-relax-all", cl::desc("Relax all fixups"));
static cl::opt<bool>
+DisableCFI("disable-cfi", cl::desc("Do not use .cfi_* directives"));
+
+static cl::opt<bool>
NoExecStack("mc-no-exec-stack", cl::desc("File doesn't need an exec stack"));
enum OutputFileType {
@@ -415,9 +418,10 @@ int main(int argc, char **argv) {
CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, *STI, Ctx);
MAB = TheTarget->createMCAsmBackend(TripleName, MCPU);
}
+ bool UseCFI = !DisableCFI;
Str.reset(TheTarget->createAsmStreamer(Ctx, FOS, /*asmverbose*/true,
/*useLoc*/ true,
- /*useCFI*/ true,
+ UseCFI,
/*useDwarfDirectory*/ true,
IP, CE, MAB, ShowInst));
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index 0543e83f9c..27efd74264 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -113,6 +113,10 @@ namespace {
cl::opt<bool> WithoutAliases("without-aliases", cl::Hidden,
cl::desc("Exclude aliases from output"));
+ cl::opt<bool> ArchiveMap("print-armap",
+ cl::desc("Print the archive map"));
+ cl::alias ArchiveMaps("s", cl::desc("Alias for --print-armap"),
+ cl::aliasopt(ArchiveMap));
bool PrintAddress = true;
bool MultipleFiles = false;
@@ -146,6 +150,8 @@ namespace {
return true;
else if (a.Address == b.Address && a.Name < b.Name)
return true;
+ else if (a.Address == b.Address && a.Name == b.Name && a.Size < b.Size)
+ return true;
else
return false;
@@ -156,12 +162,21 @@ namespace {
return true;
else if (a.Size == b.Size && a.Name < b.Name)
return true;
+ else if (a.Size == b.Size && a.Name == b.Name && a.Address < b.Address)
+ return true;
else
return false;
}
static bool CompareSymbolName(const NMSymbol &a, const NMSymbol &b) {
- return a.Name < b.Name;
+ if (a.Name < b.Name)
+ return true;
+ else if (a.Name == b.Name && a.Size < b.Size)
+ return true;
+ else if (a.Name == b.Name && a.Size == b.Size && a.Address < b.Address)
+ return true;
+ else
+ return false;
}
StringRef CurrentFilename;
@@ -346,6 +361,24 @@ static void DumpSymbolNamesFromFile(std::string &Filename) {
return;
if (object::Archive *a = dyn_cast<object::Archive>(arch.get())) {
+ if (ArchiveMap) {
+ outs() << "Archive map" << "\n";
+ for (object::Archive::symbol_iterator i = a->begin_symbols(),
+ e = a->end_symbols(); i != e; ++i) {
+ object::Archive::child_iterator c;
+ StringRef symname;
+ StringRef filename;
+ if (error(i->getMember(c)))
+ return;
+ if (error(i->getName(symname)))
+ return;
+ if (error(c->getName(filename)))
+ return;
+ outs() << symname << " in " << filename << "\n";
+ }
+ outs() << "\n";
+ }
+
for (object::Archive::child_iterator i = a->begin_children(),
e = a->end_children(); i != e; ++i) {
OwningPtr<Binary> child;
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp
index 1feea421f2..46e71ceb4d 100644
--- a/tools/llvm-objdump/MachODump.cpp
+++ b/tools/llvm-objdump/MachODump.cpp
@@ -309,16 +309,10 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
raw_ostream &DebugOut = nulls();
#endif
- StringRef DebugAbbrevSection, DebugInfoSection, DebugArangesSection,
- DebugLineSection, DebugStrSection;
OwningPtr<DIContext> diContext;
- OwningPtr<MachOObjectFile> DSYMObj;
- MachOObject *DbgInfoObj = MachOObj;
+ ObjectFile *DbgObj = MachOOF.get();
// Try to find debug info and set up the DIContext for it.
if (UseDbg) {
- ArrayRef<SectionRef> DebugSections = Sections;
- std::vector<SectionRef> DSYMSections;
-
// A separate DSym file path was specified, parse it as a macho file,
// get the sections and supply it to the section name parsing machinery.
if (!DSYMFile.empty()) {
@@ -327,42 +321,11 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
errs() << "llvm-objdump: " << Filename << ": " << ec.message() << '\n';
return;
}
- DSYMObj.reset(static_cast<MachOObjectFile*>(
- ObjectFile::createMachOObjectFile(Buf.take())));
- const macho::Header &Header = DSYMObj->getObject()->getHeader();
-
- std::vector<SymbolRef> Symbols;
- SmallVector<uint64_t, 8> FoundFns;
- getSectionsAndSymbols(Header, DSYMObj.get(), 0, DSYMSections, Symbols,
- FoundFns);
- DebugSections = DSYMSections;
- DbgInfoObj = DSYMObj.get()->getObject();
- }
-
- // Find the named debug info sections.
- for (unsigned SectIdx = 0; SectIdx != DebugSections.size(); SectIdx++) {
- StringRef SectName;
- if (!DebugSections[SectIdx].getName(SectName)) {
- if (SectName.equals("__DWARF,__debug_abbrev"))
- DebugSections[SectIdx].getContents(DebugAbbrevSection);
- else if (SectName.equals("__DWARF,__debug_info"))
- DebugSections[SectIdx].getContents(DebugInfoSection);
- else if (SectName.equals("__DWARF,__debug_aranges"))
- DebugSections[SectIdx].getContents(DebugArangesSection);
- else if (SectName.equals("__DWARF,__debug_line"))
- DebugSections[SectIdx].getContents(DebugLineSection);
- else if (SectName.equals("__DWARF,__debug_str"))
- DebugSections[SectIdx].getContents(DebugStrSection);
- }
+ DbgObj = ObjectFile::createMachOObjectFile(Buf.take());
}
- // Setup the DIContext.
- diContext.reset(DIContext::getDWARFContext(DbgInfoObj->isLittleEndian(),
- DebugInfoSection,
- DebugAbbrevSection,
- DebugArangesSection,
- DebugLineSection,
- DebugStrSection));
+ // Setup the DIContext
+ diContext.reset(DIContext::getDWARFContext(DbgObj));
}
FunctionMapTy FunctionMap;
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index 13ea4e3295..ddfcca3938 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -100,6 +100,10 @@ MAttrs("mattr",
cl::desc("Target specific attributes"),
cl::value_desc("a1,+a2,-a3,..."));
+static cl::opt<bool>
+NoShowRawInsn("no-show-raw-insn", cl::desc("When disassembling instructions, "
+ "do not print the instruction bytes."));
+
static StringRef ToolName;
static bool error(error_code ec) {
@@ -321,8 +325,11 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
DebugOut, nulls())) {
- outs() << format("%8" PRIx64 ":\t", SectionAddr + Index);
- DumpBytes(StringRef(Bytes.data() + Index, Size));
+ outs() << format("%8" PRIx64 ":", SectionAddr + Index);
+ if (!NoShowRawInsn) {
+ outs() << "\t";
+ DumpBytes(StringRef(Bytes.data() + Index, Size));
+ }
IP->printInst(&Inst, outs(), "");
outs() << "\n";
} else {
diff --git a/tools/llvm-prof/llvm-prof.cpp b/tools/llvm-prof/llvm-prof.cpp
index 81e9503abe..940ac340e7 100644
--- a/tools/llvm-prof/llvm-prof.cpp
+++ b/tools/llvm-prof/llvm-prof.cpp
@@ -287,7 +287,9 @@ int main(int argc, char **argv) {
PassManager PassMgr;
PassMgr.add(createProfileLoaderPass(ProfileDataFile));
PassMgr.add(new ProfileInfoPrinterPass(PIL));
+ PassMgr.doInitialization();
PassMgr.run(*M);
+ PassMgr.doFinalization();
return 0;
}
diff --git a/tools/llvm-rtdyld/llvm-rtdyld.cpp b/tools/llvm-rtdyld/llvm-rtdyld.cpp
index 7b5bd0388d..e06d798cd5 100644
--- a/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -58,13 +58,15 @@ public:
uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
unsigned SectionID);
uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID);
+ unsigned SectionID, bool IsReadOnly);
virtual void *getPointerToNamedFunction(const std::string &Name,
bool AbortOnFailure = true) {
return 0;
}
+ bool applyPermissions(std::string *ErrMsg) { return false; }
+
// Invalidate instruction cache for sections with execute permissions.
// Some platforms with separate data cache and instruction cache require
// explicit cache flush, otherwise JIT code manipulations (like resolved
@@ -82,7 +84,8 @@ uint8_t *TrivialMemoryManager::allocateCodeSection(uintptr_t Size,
uint8_t *TrivialMemoryManager::allocateDataSection(uintptr_t Size,
unsigned Alignment,
- unsigned SectionID) {
+ unsigned SectionID,
+ bool IsReadOnly) {
sys::MemoryBlock MB = sys::Memory::AllocateRWX(Size, 0, 0);
DataMemory.push_back(MB);
return (uint8_t*)MB.base();
diff --git a/tools/llvm-stress/llvm-stress.cpp b/tools/llvm-stress/llvm-stress.cpp
index 8473d94731..72fdac87b4 100644
--- a/tools/llvm-stress/llvm-stress.cpp
+++ b/tools/llvm-stress/llvm-stress.cpp
@@ -713,7 +713,9 @@ int main(int argc, char **argv) {
PassManager Passes;
Passes.add(createVerifierPass());
Passes.add(createPrintModulePass(&Out->os()));
+ Passes.doInitialization();
Passes.run(*M.get());
+ Passes.doFinalization();
Out->keep();
return 0;
diff --git a/tools/llvm-symbolizer/CMakeLists.txt b/tools/llvm-symbolizer/CMakeLists.txt
new file mode 100644
index 0000000000..5e274630c8
--- /dev/null
+++ b/tools/llvm-symbolizer/CMakeLists.txt
@@ -0,0 +1,13 @@
+# FIXME: As we plan to execute llvm-symbolizer binary from compiler-rt
+# libraries, it has to be compiled for all supported targets (x86_64, i386 etc).
+# This means that we need LLVM libraries to be compiled for these
+# targets as well. Currently, there is no support for such a build strategy.
+
+set(LLVM_LINK_COMPONENTS
+ DebugInfo
+ Object
+ )
+
+add_llvm_tool(llvm-symbolizer
+ llvm-symbolizer.cpp
+ )
diff --git a/lib/Target/CellSPU/TargetInfo/Makefile b/tools/llvm-symbolizer/Makefile
index 9cb6827b43..5ac83a5813 100644
--- a/lib/Target/CellSPU/TargetInfo/Makefile
+++ b/tools/llvm-symbolizer/Makefile
@@ -1,15 +1,17 @@
-##===- lib/Target/CellSPU/TargetInfo/Makefile --------------*- Makefile -*-===##
-#
+##===- tools/llvm-symbolizer/Makefile ----------------------*- Makefile -*-===##
+#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
-#
+#
##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMCellSPUInfo
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+LEVEL := ../..
+TOOLNAME := llvm-symbolizer
+LINK_COMPONENTS := DebugInfo Object
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS := 1
include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-symbolizer/llvm-symbolizer.cpp b/tools/llvm-symbolizer/llvm-symbolizer.cpp
new file mode 100644
index 0000000000..f3335a3fc3
--- /dev/null
+++ b/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -0,0 +1,323 @@
+//===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This utility works much like "addr2line". It is able of transforming
+// tuples (module name, module offset) to code locations (function name,
+// file, line number, column number). It is targeted for compiler-rt tools
+// (especially AddressSanitizer and ThreadSanitizer) that can use it
+// to symbolize stack traces in their error reports.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <cstdio>
+#include <cstring>
+#include <map>
+#include <string>
+
+using namespace llvm;
+using namespace object;
+
+static cl::opt<bool>
+UseSymbolTable("use-symbol-table", cl::init(true),
+ cl::desc("Prefer names in symbol table to names "
+ "in debug info"));
+
+static cl::opt<bool>
+PrintFunctions("functions", cl::init(true),
+ cl::desc("Print function names as well as line "
+ "information for a given address"));
+
+static cl::opt<bool>
+PrintInlining("inlining", cl::init(true),
+ cl::desc("Print all inlined frames for a given address"));
+
+static cl::opt<bool>
+Demangle("demangle", cl::init(true),
+ cl::desc("Demangle function names"));
+
+static StringRef ToolInvocationPath;
+
+static bool error(error_code ec) {
+ if (!ec) return false;
+ errs() << ToolInvocationPath << ": error reading file: "
+ << ec.message() << ".\n";
+ return true;
+}
+
+static uint32_t getDILineInfoSpecifierFlags() {
+ uint32_t Flags = llvm::DILineInfoSpecifier::FileLineInfo |
+ llvm::DILineInfoSpecifier::AbsoluteFilePath;
+ if (PrintFunctions)
+ Flags |= llvm::DILineInfoSpecifier::FunctionName;
+ return Flags;
+}
+
+static void patchFunctionNameInDILineInfo(const std::string &NewFunctionName,
+ DILineInfo &LineInfo) {
+ std::string FileName = LineInfo.getFileName();
+ LineInfo = DILineInfo(StringRef(FileName), StringRef(NewFunctionName),
+ LineInfo.getLine(), LineInfo.getColumn());
+}
+
+namespace {
+class ModuleInfo {
+ OwningPtr<ObjectFile> Module;
+ OwningPtr<DIContext> DebugInfoContext;
+ public:
+ ModuleInfo(ObjectFile *Obj, DIContext *DICtx)
+ : Module(Obj), DebugInfoContext(DICtx) {}
+
+ DILineInfo symbolizeCode(uint64_t ModuleOffset) const {
+ DILineInfo LineInfo;
+ if (DebugInfoContext) {
+ LineInfo = DebugInfoContext->getLineInfoForAddress(
+ ModuleOffset, getDILineInfoSpecifierFlags());
+ }
+ // Override function name from symbol table if necessary.
+ if (PrintFunctions && UseSymbolTable) {
+ std::string Function;
+ if (getFunctionNameFromSymbolTable(ModuleOffset, Function)) {
+ patchFunctionNameInDILineInfo(Function, LineInfo);
+ }
+ }
+ return LineInfo;
+ }
+
+ DIInliningInfo symbolizeInlinedCode(uint64_t ModuleOffset) const {
+ DIInliningInfo InlinedContext;
+ if (DebugInfoContext) {
+ InlinedContext = DebugInfoContext->getInliningInfoForAddress(
+ ModuleOffset, getDILineInfoSpecifierFlags());
+ }
+ // Make sure there is at least one frame in context.
+ if (InlinedContext.getNumberOfFrames() == 0) {
+ InlinedContext.addFrame(DILineInfo());
+ }
+ // Override the function name in lower frame with name from symbol table.
+ if (PrintFunctions && UseSymbolTable) {
+ DIInliningInfo PatchedInlinedContext;
+ for (uint32_t i = 0, n = InlinedContext.getNumberOfFrames();
+ i != n; i++) {
+ DILineInfo LineInfo = InlinedContext.getFrame(i);
+ if (i == n - 1) {
+ std::string Function;
+ if (getFunctionNameFromSymbolTable(ModuleOffset, Function)) {
+ patchFunctionNameInDILineInfo(Function, LineInfo);
+ }
+ }
+ PatchedInlinedContext.addFrame(LineInfo);
+ }
+ InlinedContext = PatchedInlinedContext;
+ }
+ return InlinedContext;
+ }
+
+ private:
+ bool getFunctionNameFromSymbolTable(uint64_t Address,
+ std::string &FunctionName) const {
+ assert(Module);
+ error_code ec;
+ for (symbol_iterator si = Module->begin_symbols(),
+ se = Module->end_symbols();
+ si != se; si.increment(ec)) {
+ if (error(ec)) return false;
+ uint64_t SymbolAddress;
+ uint64_t SymbolSize;
+ SymbolRef::Type SymbolType;
+ if (error(si->getAddress(SymbolAddress)) ||
+ SymbolAddress == UnknownAddressOrSize) continue;
+ if (error(si->getSize(SymbolSize)) ||
+ SymbolSize == UnknownAddressOrSize) continue;
+ if (error(si->getType(SymbolType))) continue;
+ // FIXME: If a function has alias, there are two entries in symbol table
+ // with same address size. Make sure we choose the correct one.
+ if (SymbolAddress <= Address && Address < SymbolAddress + SymbolSize &&
+ SymbolType == SymbolRef::ST_Function) {
+ StringRef Name;
+ if (error(si->getName(Name))) continue;
+ FunctionName = Name.str();
+ return true;
+ }
+ }
+ return false;
+ }
+};
+
+typedef std::map<std::string, ModuleInfo*> ModuleMapTy;
+typedef ModuleMapTy::iterator ModuleMapIter;
+} // namespace
+
+static ModuleMapTy Modules;
+
+// Returns true if the object endianness is known.
+static bool getObjectEndianness(const ObjectFile *Obj,
+ bool &IsLittleEndian) {
+ // FIXME: Implement this when libLLVMObject allows to do it easily.
+ IsLittleEndian = true;
+ return true;
+}
+
+static ObjectFile *getObjectFile(const std::string &Path) {
+ OwningPtr<MemoryBuffer> Buff;
+ MemoryBuffer::getFile(Path, Buff);
+ return ObjectFile::createObjectFile(Buff.take());
+}
+
+static std::string getDarwinDWARFResourceForModule(const std::string &Path) {
+ StringRef Basename = sys::path::filename(Path);
+ const std::string &DSymDirectory = Path + ".dSYM";
+ SmallString<16> ResourceName = StringRef(DSymDirectory);
+ sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
+ sys::path::append(ResourceName, Basename);
+ return ResourceName.str();
+}
+
+static ModuleInfo *getOrCreateModuleInfo(const std::string &ModuleName) {
+ ModuleMapIter I = Modules.find(ModuleName);
+ if (I != Modules.end())
+ return I->second;
+
+ ObjectFile *Obj = getObjectFile(ModuleName);
+ ObjectFile *DbgObj = Obj;
+ if (Obj == 0) {
+ // Module name doesn't point to a valid object file.
+ Modules.insert(make_pair(ModuleName, (ModuleInfo*)0));
+ return 0;
+ }
+
+ DIContext *Context = 0;
+ bool IsLittleEndian;
+ if (getObjectEndianness(Obj, IsLittleEndian)) {
+ // On Darwin we may find DWARF in separate object file in
+ // resource directory.
+ if (isa<MachOObjectFile>(Obj)) {
+ const std::string &ResourceName = getDarwinDWARFResourceForModule(
+ ModuleName);
+ ObjectFile *ResourceObj = getObjectFile(ResourceName);
+ if (ResourceObj != 0)
+ DbgObj = ResourceObj;
+ }
+ Context = DIContext::getDWARFContext(DbgObj);
+ assert(Context);
+ }
+
+ ModuleInfo *Info = new ModuleInfo(Obj, Context);
+ Modules.insert(make_pair(ModuleName, Info));
+ return Info;
+}
+
+#if !defined(_MSC_VER)
+// Assume that __cxa_demangle is provided by libcxxabi (except for Windows).
+extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer,
+ size_t *length, int *status);
+#endif
+
+static void printDILineInfo(DILineInfo LineInfo) {
+ // By default, DILineInfo contains "<invalid>" for function/filename it
+ // cannot fetch. We replace it to "??" to make our output closer to addr2line.
+ static const std::string kDILineInfoBadString = "<invalid>";
+ static const std::string kSymbolizerBadString = "??";
+ if (PrintFunctions) {
+ std::string FunctionName = LineInfo.getFunctionName();
+ if (FunctionName == kDILineInfoBadString)
+ FunctionName = kSymbolizerBadString;
+#if !defined(_MSC_VER)
+ if (Demangle) {
+ int status = 0;
+ char *DemangledName = __cxa_demangle(
+ FunctionName.c_str(), 0, 0, &status);
+ if (status == 0) {
+ FunctionName = DemangledName;
+ free(DemangledName);
+ }
+ }
+#endif
+ outs() << FunctionName << "\n";
+ }
+ std::string Filename = LineInfo.getFileName();
+ if (Filename == kDILineInfoBadString)
+ Filename = kSymbolizerBadString;
+ outs() << Filename <<
+ ":" << LineInfo.getLine() <<
+ ":" << LineInfo.getColumn() <<
+ "\n";
+}
+
+static void symbolize(std::string ModuleName, std::string ModuleOffsetStr) {
+ ModuleInfo *Info = getOrCreateModuleInfo(ModuleName);
+ uint64_t Offset = 0;
+ if (Info == 0 ||
+ StringRef(ModuleOffsetStr).getAsInteger(0, Offset)) {
+ printDILineInfo(DILineInfo());
+ } else if (PrintInlining) {
+ DIInliningInfo InlinedContext = Info->symbolizeInlinedCode(Offset);
+ uint32_t FramesNum = InlinedContext.getNumberOfFrames();
+ assert(FramesNum > 0);
+ for (uint32_t i = 0; i < FramesNum; i++) {
+ DILineInfo LineInfo = InlinedContext.getFrame(i);
+ printDILineInfo(LineInfo);
+ }
+ } else {
+ DILineInfo LineInfo = Info->symbolizeCode(Offset);
+ printDILineInfo(LineInfo);
+ }
+
+ outs() << "\n"; // Print extra empty line to mark the end of output.
+ outs().flush();
+}
+
+static bool parseModuleNameAndOffset(std::string &ModuleName,
+ std::string &ModuleOffsetStr) {
+ static const int kMaxInputStringLength = 1024;
+ static const char kDelimiters[] = " \n";
+ char InputString[kMaxInputStringLength];
+ if (!fgets(InputString, sizeof(InputString), stdin))
+ return false;
+ ModuleName = "";
+ ModuleOffsetStr = "";
+ // FIXME: Handle case when filename is given in quotes.
+ if (char *FilePath = strtok(InputString, kDelimiters)) {
+ ModuleName = FilePath;
+ if (char *OffsetStr = strtok((char*)0, kDelimiters))
+ ModuleOffsetStr = OffsetStr;
+ }
+ return true;
+}
+
+int main(int argc, char **argv) {
+ // Print stack trace if we signal out.
+ sys::PrintStackTraceOnErrorSignal();
+ PrettyStackTraceProgram X(argc, argv);
+ llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
+
+ cl::ParseCommandLineOptions(argc, argv, "llvm symbolizer for compiler-rt\n");
+ ToolInvocationPath = argv[0];
+
+ std::string ModuleName;
+ std::string ModuleOffsetStr;
+ while (parseModuleNameAndOffset(ModuleName, ModuleOffsetStr)) {
+ symbolize(ModuleName, ModuleOffsetStr);
+ }
+ return 0;
+}
diff --git a/tools/lto/CMakeLists.txt b/tools/lto/CMakeLists.txt
index 911297609b..a004bad189 100644
--- a/tools/lto/CMakeLists.txt
+++ b/tools/lto/CMakeLists.txt
@@ -6,6 +6,7 @@ add_definitions( -DLLVM_VERSION_INFO=\"${PACKAGE_VERSION}\" )
set(SOURCES
LTOCodeGenerator.cpp
+ LTODisassembler.cpp
lto.cpp
LTOModule.cpp
)
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 5d79fda5aa..2c7a09b966 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -490,7 +490,9 @@ void LTOCodeGenerator::applyScopeRestrictions() {
passes.add(createInternalizePass(mustPreserveList));
// apply scope restrictions
+ passes.doInitialization();
passes.run(*mergedModule);
+ passes.doFinalization();
_scopeRestrictionsDone = true;
}
@@ -545,7 +547,9 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out,
}
// Run our queue of passes all at once now, efficiently.
+ passes.doInitialization();
passes.run(*mergedModule);
+ passes.doFinalization();
// Run the code generator, and write assembly file
codeGenPasses->doInitialization();
diff --git a/tools/lto/LTODisassembler.cpp b/tools/lto/LTODisassembler.cpp
new file mode 100644
index 0000000000..186acebc01
--- /dev/null
+++ b/tools/lto/LTODisassembler.cpp
@@ -0,0 +1,26 @@
+//===-- LTODisassembler.cpp - LTO Disassembler interface ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This function provides utility methods used by clients of libLTO that want
+// to use the disassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/lto.h"
+#include "llvm/Support/TargetSelect.h"
+
+using namespace llvm;
+
+void lto_initialize_disassembler() {
+ // Initialize targets and assembly printers/parsers.
+ llvm::InitializeAllTargetInfos();
+ llvm::InitializeAllTargetMCs();
+ llvm::InitializeAllAsmParsers();
+ llvm::InitializeAllDisassemblers();
+}
diff --git a/tools/lto/lto.exports b/tools/lto/lto.exports
index e589c5d2c6..024951f88c 100644
--- a/tools/lto/lto.exports
+++ b/tools/lto/lto.exports
@@ -2,6 +2,7 @@ lto_add_command_line_option
lto_parse_command_line_options
lto_get_error_message
lto_get_version
+lto_initialize_disassembler
lto_module_create
lto_module_create_from_fd
lto_module_create_from_fd_at_offset
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index 0390bc470a..2f91207bca 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -821,7 +821,9 @@ int main(int argc, char **argv) {
cl::PrintOptionValues();
// Now that we have all of the passes ready, run them.
+ Passes.doInitialization();
Passes.run(*M.get());
+ Passes.doFinalization();
// Declare success.
if (!NoOutput || PrintBreakpoints)
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index 59604dfbf5..6e54449beb 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -118,13 +118,14 @@ public:
Base->endFunctionBody(F, FunctionStart, FunctionEnd);
}
virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID) {
- return Base->allocateDataSection(Size, Alignment, SectionID);
+ unsigned SectionID, bool IsReadOnly) {
+ return Base->allocateDataSection(Size, Alignment, SectionID, IsReadOnly);
}
virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
unsigned SectionID) {
return Base->allocateCodeSection(Size, Alignment, SectionID);
}
+ virtual bool applyPermissions(std::string *ErrMsg) { return false; }
virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
return Base->allocateSpace(Size, Alignment);
}
diff --git a/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
index d6baf3c9bb..225106ecab 100644
--- a/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
+++ b/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
@@ -32,7 +32,8 @@ namespace llvm {
uint8_t *SectionMemoryManager::allocateDataSection(uintptr_t Size,
unsigned Alignment,
- unsigned SectionID) {
+ unsigned SectionID,
+ bool IsReadOnly) {
if (!Alignment)
Alignment = 16;
// Ensure that enough memory is requested to allow aligning.
diff --git a/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h b/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h
index e44217c906..968ee63ffd 100644
--- a/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h
+++ b/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h
@@ -34,7 +34,9 @@ public:
unsigned SectionID);
virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID);
+ unsigned SectionID, bool IsReadOnly);
+
+ virtual bool applyPermissions(std::string *ErrMsg) { return false; }
virtual void *getPointerToNamedFunction(const std::string &Name,
bool AbortOnFailure = true);
diff --git a/unittests/Support/AlignOfTest.cpp b/unittests/Support/AlignOfTest.cpp
index f01e660939..a9be1c8415 100644
--- a/unittests/Support/AlignOfTest.cpp
+++ b/unittests/Support/AlignOfTest.cpp
@@ -22,6 +22,18 @@ namespace {
#pragma warning(disable:4584)
#endif
+// Suppress direct base '{anonymous}::S1' inaccessible in '{anonymous}::D9'
+// due to ambiguity warning.
+//
+// Pragma based warning suppression was introduced in GGC 4.2. Additionally
+// this warning is "enabled by default". The warning still appears if -Wall is
+// suppressed. Apparently GCC suppresses it when -w is specifed, which is odd.
+// At any rate, clang on the other hand gripes about -Wunknown-pragma, so
+// leaving it out of this.
+#if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 402 && !defined(__clang__)
+#pragma GCC diagnostic warning "-w"
+#endif
+
// Define some fixed alignment types to use in these tests.
#if __has_feature(cxx_alignas)
struct alignas(1) A1 { };
diff --git a/unittests/Support/MemoryTest.cpp b/unittests/Support/MemoryTest.cpp
index 21cb27eaf0..fcf9aebad2 100644
--- a/unittests/Support/MemoryTest.cpp
+++ b/unittests/Support/MemoryTest.cpp
@@ -1,356 +1,356 @@
-//===- llvm/unittest/Support/AllocatorTest.cpp - BumpPtrAllocator tests ---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/Process.h"
-
-#include "gtest/gtest.h"
-#include <cstdlib>
-
-using namespace llvm;
-using namespace sys;
-
-namespace {
-
-class MappedMemoryTest : public ::testing::TestWithParam<unsigned> {
-public:
- MappedMemoryTest() {
- Flags = GetParam();
- PageSize = sys::Process::GetPageSize();
- }
-
-protected:
- // Adds RW flags to permit testing of the resulting memory
- unsigned getTestableEquivalent(unsigned RequestedFlags) {
- switch (RequestedFlags) {
- case Memory::MF_READ:
- case Memory::MF_WRITE:
- case Memory::MF_READ|Memory::MF_WRITE:
- return Memory::MF_READ|Memory::MF_WRITE;
- case Memory::MF_READ|Memory::MF_EXEC:
- case Memory::MF_READ|Memory::MF_WRITE|Memory::MF_EXEC:
- case Memory::MF_EXEC:
- return Memory::MF_READ|Memory::MF_WRITE|Memory::MF_EXEC;
- }
- // Default in case values are added to the enum, as required by some compilers
- return Memory::MF_READ|Memory::MF_WRITE;
- }
-
- // Returns true if the memory blocks overlap
- bool doesOverlap(MemoryBlock M1, MemoryBlock M2) {
- if (M1.base() == M2.base())
- return true;
-
- if (M1.base() > M2.base())
- return (unsigned char *)M2.base() + M2.size() > M1.base();
-
- return (unsigned char *)M1.base() + M1.size() > M2.base();
- }
-
- unsigned Flags;
- size_t PageSize;
-};
-
-TEST_P(MappedMemoryTest, AllocAndRelease) {
- error_code EC;
- MemoryBlock M1 = Memory::allocateMappedMemory(sizeof(int), 0, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
-
- EXPECT_NE((void*)0, M1.base());
- EXPECT_LE(sizeof(int), M1.size());
-
- EXPECT_FALSE(Memory::releaseMappedMemory(M1));
-}
-
-TEST_P(MappedMemoryTest, MultipleAllocAndRelease) {
- error_code EC;
- MemoryBlock M1 = Memory::allocateMappedMemory(16, 0, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
- MemoryBlock M2 = Memory::allocateMappedMemory(64, 0, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
- MemoryBlock M3 = Memory::allocateMappedMemory(32, 0, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
-
- EXPECT_NE((void*)0, M1.base());
- EXPECT_LE(16U, M1.size());
- EXPECT_NE((void*)0, M2.base());
- EXPECT_LE(64U, M2.size());
- EXPECT_NE((void*)0, M3.base());
- EXPECT_LE(32U, M3.size());
-
- EXPECT_FALSE(doesOverlap(M1, M2));
- EXPECT_FALSE(doesOverlap(M2, M3));
- EXPECT_FALSE(doesOverlap(M1, M3));
-
- EXPECT_FALSE(Memory::releaseMappedMemory(M1));
- EXPECT_FALSE(Memory::releaseMappedMemory(M3));
- MemoryBlock M4 = Memory::allocateMappedMemory(16, 0, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
- EXPECT_NE((void*)0, M4.base());
- EXPECT_LE(16U, M4.size());
- EXPECT_FALSE(Memory::releaseMappedMemory(M4));
- EXPECT_FALSE(Memory::releaseMappedMemory(M2));
-}
-
-TEST_P(MappedMemoryTest, BasicWrite) {
- // This test applies only to writeable combinations
- if (Flags && !(Flags & Memory::MF_WRITE))
- return;
-
- error_code EC;
- MemoryBlock M1 = Memory::allocateMappedMemory(sizeof(int), 0, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
-
- EXPECT_NE((void*)0, M1.base());
- EXPECT_LE(sizeof(int), M1.size());
-
- int *a = (int*)M1.base();
- *a = 1;
- EXPECT_EQ(1, *a);
-
- EXPECT_FALSE(Memory::releaseMappedMemory(M1));
-}
-
-TEST_P(MappedMemoryTest, MultipleWrite) {
- // This test applies only to writeable combinations
- if (Flags && !(Flags & Memory::MF_WRITE))
- return;
- error_code EC;
- MemoryBlock M1 = Memory::allocateMappedMemory(sizeof(int), 0, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
- MemoryBlock M2 = Memory::allocateMappedMemory(8 * sizeof(int), 0, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
- MemoryBlock M3 = Memory::allocateMappedMemory(4 * sizeof(int), 0, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
-
- EXPECT_FALSE(doesOverlap(M1, M2));
- EXPECT_FALSE(doesOverlap(M2, M3));
- EXPECT_FALSE(doesOverlap(M1, M3));
-
- EXPECT_NE((void*)0, M1.base());
- EXPECT_LE(1U * sizeof(int), M1.size());
- EXPECT_NE((void*)0, M2.base());
- EXPECT_LE(8U * sizeof(int), M2.size());
- EXPECT_NE((void*)0, M3.base());
- EXPECT_LE(4U * sizeof(int), M3.size());
-
- int *x = (int*)M1.base();
- *x = 1;
-
- int *y = (int*)M2.base();
- for (int i = 0; i < 8; i++) {
- y[i] = i;
- }
-
- int *z = (int*)M3.base();
- *z = 42;
-
- EXPECT_EQ(1, *x);
- EXPECT_EQ(7, y[7]);
- EXPECT_EQ(42, *z);
-
- EXPECT_FALSE(Memory::releaseMappedMemory(M1));
- EXPECT_FALSE(Memory::releaseMappedMemory(M3));
-
- MemoryBlock M4 = Memory::allocateMappedMemory(64 * sizeof(int), 0, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
- EXPECT_NE((void*)0, M4.base());
- EXPECT_LE(64U * sizeof(int), M4.size());
- x = (int*)M4.base();
- *x = 4;
- EXPECT_EQ(4, *x);
- EXPECT_FALSE(Memory::releaseMappedMemory(M4));
-
- // Verify that M2 remains unaffected by other activity
- for (int i = 0; i < 8; i++) {
- EXPECT_EQ(i, y[i]);
- }
- EXPECT_FALSE(Memory::releaseMappedMemory(M2));
-}
-
-TEST_P(MappedMemoryTest, EnabledWrite) {
- error_code EC;
- MemoryBlock M1 = Memory::allocateMappedMemory(2 * sizeof(int), 0, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
- MemoryBlock M2 = Memory::allocateMappedMemory(8 * sizeof(int), 0, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
- MemoryBlock M3 = Memory::allocateMappedMemory(4 * sizeof(int), 0, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
-
- EXPECT_NE((void*)0, M1.base());
- EXPECT_LE(2U * sizeof(int), M1.size());
- EXPECT_NE((void*)0, M2.base());
- EXPECT_LE(8U * sizeof(int), M2.size());
- EXPECT_NE((void*)0, M3.base());
- EXPECT_LE(4U * sizeof(int), M3.size());
-
- EXPECT_FALSE(Memory::protectMappedMemory(M1, getTestableEquivalent(Flags)));
- EXPECT_FALSE(Memory::protectMappedMemory(M2, getTestableEquivalent(Flags)));
- EXPECT_FALSE(Memory::protectMappedMemory(M3, getTestableEquivalent(Flags)));
-
- EXPECT_FALSE(doesOverlap(M1, M2));
- EXPECT_FALSE(doesOverlap(M2, M3));
- EXPECT_FALSE(doesOverlap(M1, M3));
-
- int *x = (int*)M1.base();
- *x = 1;
- int *y = (int*)M2.base();
- for (unsigned int i = 0; i < 8; i++) {
- y[i] = i;
- }
- int *z = (int*)M3.base();
- *z = 42;
-
- EXPECT_EQ(1, *x);
- EXPECT_EQ(7, y[7]);
- EXPECT_EQ(42, *z);
-
- EXPECT_FALSE(Memory::releaseMappedMemory(M1));
- EXPECT_FALSE(Memory::releaseMappedMemory(M3));
- EXPECT_EQ(6, y[6]);
-
- MemoryBlock M4 = Memory::allocateMappedMemory(16, 0, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
- EXPECT_NE((void*)0, M4.base());
- EXPECT_LE(16U, M4.size());
- EXPECT_EQ(error_code::success(), Memory::protectMappedMemory(M4, getTestableEquivalent(Flags)));
- x = (int*)M4.base();
- *x = 4;
- EXPECT_EQ(4, *x);
- EXPECT_FALSE(Memory::releaseMappedMemory(M4));
- EXPECT_FALSE(Memory::releaseMappedMemory(M2));
-}
-
-TEST_P(MappedMemoryTest, SuccessiveNear) {
- error_code EC;
- MemoryBlock M1 = Memory::allocateMappedMemory(16, 0, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
- MemoryBlock M2 = Memory::allocateMappedMemory(64, &M1, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
- MemoryBlock M3 = Memory::allocateMappedMemory(32, &M2, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
-
- EXPECT_NE((void*)0, M1.base());
- EXPECT_LE(16U, M1.size());
- EXPECT_NE((void*)0, M2.base());
- EXPECT_LE(64U, M2.size());
- EXPECT_NE((void*)0, M3.base());
- EXPECT_LE(32U, M3.size());
-
- EXPECT_FALSE(doesOverlap(M1, M2));
- EXPECT_FALSE(doesOverlap(M2, M3));
- EXPECT_FALSE(doesOverlap(M1, M3));
-
- EXPECT_FALSE(Memory::releaseMappedMemory(M1));
- EXPECT_FALSE(Memory::releaseMappedMemory(M3));
- EXPECT_FALSE(Memory::releaseMappedMemory(M2));
-}
-
-TEST_P(MappedMemoryTest, DuplicateNear) {
- error_code EC;
- MemoryBlock Near((void*)(3*PageSize), 16);
- MemoryBlock M1 = Memory::allocateMappedMemory(16, &Near, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
- MemoryBlock M2 = Memory::allocateMappedMemory(64, &Near, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
- MemoryBlock M3 = Memory::allocateMappedMemory(32, &Near, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
-
- EXPECT_NE((void*)0, M1.base());
- EXPECT_LE(16U, M1.size());
- EXPECT_NE((void*)0, M2.base());
- EXPECT_LE(64U, M2.size());
- EXPECT_NE((void*)0, M3.base());
- EXPECT_LE(32U, M3.size());
-
- EXPECT_FALSE(Memory::releaseMappedMemory(M1));
- EXPECT_FALSE(Memory::releaseMappedMemory(M3));
- EXPECT_FALSE(Memory::releaseMappedMemory(M2));
-}
-
-TEST_P(MappedMemoryTest, ZeroNear) {
- error_code EC;
- MemoryBlock Near(0, 0);
- MemoryBlock M1 = Memory::allocateMappedMemory(16, &Near, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
- MemoryBlock M2 = Memory::allocateMappedMemory(64, &Near, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
- MemoryBlock M3 = Memory::allocateMappedMemory(32, &Near, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
-
- EXPECT_NE((void*)0, M1.base());
- EXPECT_LE(16U, M1.size());
- EXPECT_NE((void*)0, M2.base());
- EXPECT_LE(64U, M2.size());
- EXPECT_NE((void*)0, M3.base());
- EXPECT_LE(32U, M3.size());
-
- EXPECT_FALSE(doesOverlap(M1, M2));
- EXPECT_FALSE(doesOverlap(M2, M3));
- EXPECT_FALSE(doesOverlap(M1, M3));
-
- EXPECT_FALSE(Memory::releaseMappedMemory(M1));
- EXPECT_FALSE(Memory::releaseMappedMemory(M3));
- EXPECT_FALSE(Memory::releaseMappedMemory(M2));
-}
-
-TEST_P(MappedMemoryTest, ZeroSizeNear) {
- error_code EC;
- MemoryBlock Near((void*)(4*PageSize), 0);
- MemoryBlock M1 = Memory::allocateMappedMemory(16, &Near, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
- MemoryBlock M2 = Memory::allocateMappedMemory(64, &Near, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
- MemoryBlock M3 = Memory::allocateMappedMemory(32, &Near, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
-
- EXPECT_NE((void*)0, M1.base());
- EXPECT_LE(16U, M1.size());
- EXPECT_NE((void*)0, M2.base());
- EXPECT_LE(64U, M2.size());
- EXPECT_NE((void*)0, M3.base());
- EXPECT_LE(32U, M3.size());
-
- EXPECT_FALSE(doesOverlap(M1, M2));
- EXPECT_FALSE(doesOverlap(M2, M3));
- EXPECT_FALSE(doesOverlap(M1, M3));
-
- EXPECT_FALSE(Memory::releaseMappedMemory(M1));
- EXPECT_FALSE(Memory::releaseMappedMemory(M3));
- EXPECT_FALSE(Memory::releaseMappedMemory(M2));
-}
-
-TEST_P(MappedMemoryTest, UnalignedNear) {
- error_code EC;
- MemoryBlock Near((void*)(2*PageSize+5), 0);
- MemoryBlock M1 = Memory::allocateMappedMemory(15, &Near, Flags, EC);
- EXPECT_EQ(error_code::success(), EC);
-
- EXPECT_NE((void*)0, M1.base());
- EXPECT_LE(sizeof(int), M1.size());
-
- EXPECT_FALSE(Memory::releaseMappedMemory(M1));
-}
-
-// Note that Memory::MF_WRITE is not supported exclusively across
-// operating systems and architectures and can imply MF_READ|MF_WRITE
-unsigned MemoryFlags[] = {
- Memory::MF_READ,
- Memory::MF_WRITE,
- Memory::MF_READ|Memory::MF_WRITE,
- Memory::MF_EXEC,
- Memory::MF_READ|Memory::MF_EXEC,
- Memory::MF_READ|Memory::MF_WRITE|Memory::MF_EXEC
- };
-
-INSTANTIATE_TEST_CASE_P(AllocationTests,
- MappedMemoryTest,
- ::testing::ValuesIn(MemoryFlags));
-
-} // anonymous namespace
+//===- llvm/unittest/Support/AllocatorTest.cpp - BumpPtrAllocator tests ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/Process.h"
+
+#include "gtest/gtest.h"
+#include <cstdlib>
+
+using namespace llvm;
+using namespace sys;
+
+namespace {
+
+class MappedMemoryTest : public ::testing::TestWithParam<unsigned> {
+public:
+ MappedMemoryTest() {
+ Flags = GetParam();
+ PageSize = sys::Process::GetPageSize();
+ }
+
+protected:
+ // Adds RW flags to permit testing of the resulting memory
+ unsigned getTestableEquivalent(unsigned RequestedFlags) {
+ switch (RequestedFlags) {
+ case Memory::MF_READ:
+ case Memory::MF_WRITE:
+ case Memory::MF_READ|Memory::MF_WRITE:
+ return Memory::MF_READ|Memory::MF_WRITE;
+ case Memory::MF_READ|Memory::MF_EXEC:
+ case Memory::MF_READ|Memory::MF_WRITE|Memory::MF_EXEC:
+ case Memory::MF_EXEC:
+ return Memory::MF_READ|Memory::MF_WRITE|Memory::MF_EXEC;
+ }
+ // Default in case values are added to the enum, as required by some compilers
+ return Memory::MF_READ|Memory::MF_WRITE;
+ }
+
+ // Returns true if the memory blocks overlap
+ bool doesOverlap(MemoryBlock M1, MemoryBlock M2) {
+ if (M1.base() == M2.base())
+ return true;
+
+ if (M1.base() > M2.base())
+ return (unsigned char *)M2.base() + M2.size() > M1.base();
+
+ return (unsigned char *)M1.base() + M1.size() > M2.base();
+ }
+
+ unsigned Flags;
+ size_t PageSize;
+};
+
+TEST_P(MappedMemoryTest, AllocAndRelease) {
+ error_code EC;
+ MemoryBlock M1 = Memory::allocateMappedMemory(sizeof(int), 0, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+
+ EXPECT_NE((void*)0, M1.base());
+ EXPECT_LE(sizeof(int), M1.size());
+
+ EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+}
+
+TEST_P(MappedMemoryTest, MultipleAllocAndRelease) {
+ error_code EC;
+ MemoryBlock M1 = Memory::allocateMappedMemory(16, 0, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+ MemoryBlock M2 = Memory::allocateMappedMemory(64, 0, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+ MemoryBlock M3 = Memory::allocateMappedMemory(32, 0, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+
+ EXPECT_NE((void*)0, M1.base());
+ EXPECT_LE(16U, M1.size());
+ EXPECT_NE((void*)0, M2.base());
+ EXPECT_LE(64U, M2.size());
+ EXPECT_NE((void*)0, M3.base());
+ EXPECT_LE(32U, M3.size());
+
+ EXPECT_FALSE(doesOverlap(M1, M2));
+ EXPECT_FALSE(doesOverlap(M2, M3));
+ EXPECT_FALSE(doesOverlap(M1, M3));
+
+ EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+ EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+ MemoryBlock M4 = Memory::allocateMappedMemory(16, 0, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+ EXPECT_NE((void*)0, M4.base());
+ EXPECT_LE(16U, M4.size());
+ EXPECT_FALSE(Memory::releaseMappedMemory(M4));
+ EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, BasicWrite) {
+ // This test applies only to writeable combinations
+ if (Flags && !(Flags & Memory::MF_WRITE))
+ return;
+
+ error_code EC;
+ MemoryBlock M1 = Memory::allocateMappedMemory(sizeof(int), 0, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+
+ EXPECT_NE((void*)0, M1.base());
+ EXPECT_LE(sizeof(int), M1.size());
+
+ int *a = (int*)M1.base();
+ *a = 1;
+ EXPECT_EQ(1, *a);
+
+ EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+}
+
+TEST_P(MappedMemoryTest, MultipleWrite) {
+ // This test applies only to writeable combinations
+ if (Flags && !(Flags & Memory::MF_WRITE))
+ return;
+ error_code EC;
+ MemoryBlock M1 = Memory::allocateMappedMemory(sizeof(int), 0, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+ MemoryBlock M2 = Memory::allocateMappedMemory(8 * sizeof(int), 0, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+ MemoryBlock M3 = Memory::allocateMappedMemory(4 * sizeof(int), 0, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+
+ EXPECT_FALSE(doesOverlap(M1, M2));
+ EXPECT_FALSE(doesOverlap(M2, M3));
+ EXPECT_FALSE(doesOverlap(M1, M3));
+
+ EXPECT_NE((void*)0, M1.base());
+ EXPECT_LE(1U * sizeof(int), M1.size());
+ EXPECT_NE((void*)0, M2.base());
+ EXPECT_LE(8U * sizeof(int), M2.size());
+ EXPECT_NE((void*)0, M3.base());
+ EXPECT_LE(4U * sizeof(int), M3.size());
+
+ int *x = (int*)M1.base();
+ *x = 1;
+
+ int *y = (int*)M2.base();
+ for (int i = 0; i < 8; i++) {
+ y[i] = i;
+ }
+
+ int *z = (int*)M3.base();
+ *z = 42;
+
+ EXPECT_EQ(1, *x);
+ EXPECT_EQ(7, y[7]);
+ EXPECT_EQ(42, *z);
+
+ EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+ EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+
+ MemoryBlock M4 = Memory::allocateMappedMemory(64 * sizeof(int), 0, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+ EXPECT_NE((void*)0, M4.base());
+ EXPECT_LE(64U * sizeof(int), M4.size());
+ x = (int*)M4.base();
+ *x = 4;
+ EXPECT_EQ(4, *x);
+ EXPECT_FALSE(Memory::releaseMappedMemory(M4));
+
+ // Verify that M2 remains unaffected by other activity
+ for (int i = 0; i < 8; i++) {
+ EXPECT_EQ(i, y[i]);
+ }
+ EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, EnabledWrite) {
+ error_code EC;
+ MemoryBlock M1 = Memory::allocateMappedMemory(2 * sizeof(int), 0, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+ MemoryBlock M2 = Memory::allocateMappedMemory(8 * sizeof(int), 0, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+ MemoryBlock M3 = Memory::allocateMappedMemory(4 * sizeof(int), 0, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+
+ EXPECT_NE((void*)0, M1.base());
+ EXPECT_LE(2U * sizeof(int), M1.size());
+ EXPECT_NE((void*)0, M2.base());
+ EXPECT_LE(8U * sizeof(int), M2.size());
+ EXPECT_NE((void*)0, M3.base());
+ EXPECT_LE(4U * sizeof(int), M3.size());
+
+ EXPECT_FALSE(Memory::protectMappedMemory(M1, getTestableEquivalent(Flags)));
+ EXPECT_FALSE(Memory::protectMappedMemory(M2, getTestableEquivalent(Flags)));
+ EXPECT_FALSE(Memory::protectMappedMemory(M3, getTestableEquivalent(Flags)));
+
+ EXPECT_FALSE(doesOverlap(M1, M2));
+ EXPECT_FALSE(doesOverlap(M2, M3));
+ EXPECT_FALSE(doesOverlap(M1, M3));
+
+ int *x = (int*)M1.base();
+ *x = 1;
+ int *y = (int*)M2.base();
+ for (unsigned int i = 0; i < 8; i++) {
+ y[i] = i;
+ }
+ int *z = (int*)M3.base();
+ *z = 42;
+
+ EXPECT_EQ(1, *x);
+ EXPECT_EQ(7, y[7]);
+ EXPECT_EQ(42, *z);
+
+ EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+ EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+ EXPECT_EQ(6, y[6]);
+
+ MemoryBlock M4 = Memory::allocateMappedMemory(16, 0, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+ EXPECT_NE((void*)0, M4.base());
+ EXPECT_LE(16U, M4.size());
+ EXPECT_EQ(error_code::success(), Memory::protectMappedMemory(M4, getTestableEquivalent(Flags)));
+ x = (int*)M4.base();
+ *x = 4;
+ EXPECT_EQ(4, *x);
+ EXPECT_FALSE(Memory::releaseMappedMemory(M4));
+ EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, SuccessiveNear) {
+ error_code EC;
+ MemoryBlock M1 = Memory::allocateMappedMemory(16, 0, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+ MemoryBlock M2 = Memory::allocateMappedMemory(64, &M1, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+ MemoryBlock M3 = Memory::allocateMappedMemory(32, &M2, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+
+ EXPECT_NE((void*)0, M1.base());
+ EXPECT_LE(16U, M1.size());
+ EXPECT_NE((void*)0, M2.base());
+ EXPECT_LE(64U, M2.size());
+ EXPECT_NE((void*)0, M3.base());
+ EXPECT_LE(32U, M3.size());
+
+ EXPECT_FALSE(doesOverlap(M1, M2));
+ EXPECT_FALSE(doesOverlap(M2, M3));
+ EXPECT_FALSE(doesOverlap(M1, M3));
+
+ EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+ EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+ EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, DuplicateNear) {
+ error_code EC;
+ MemoryBlock Near((void*)(3*PageSize), 16);
+ MemoryBlock M1 = Memory::allocateMappedMemory(16, &Near, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+ MemoryBlock M2 = Memory::allocateMappedMemory(64, &Near, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+ MemoryBlock M3 = Memory::allocateMappedMemory(32, &Near, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+
+ EXPECT_NE((void*)0, M1.base());
+ EXPECT_LE(16U, M1.size());
+ EXPECT_NE((void*)0, M2.base());
+ EXPECT_LE(64U, M2.size());
+ EXPECT_NE((void*)0, M3.base());
+ EXPECT_LE(32U, M3.size());
+
+ EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+ EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+ EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, ZeroNear) {
+ error_code EC;
+ MemoryBlock Near(0, 0);
+ MemoryBlock M1 = Memory::allocateMappedMemory(16, &Near, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+ MemoryBlock M2 = Memory::allocateMappedMemory(64, &Near, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+ MemoryBlock M3 = Memory::allocateMappedMemory(32, &Near, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+
+ EXPECT_NE((void*)0, M1.base());
+ EXPECT_LE(16U, M1.size());
+ EXPECT_NE((void*)0, M2.base());
+ EXPECT_LE(64U, M2.size());
+ EXPECT_NE((void*)0, M3.base());
+ EXPECT_LE(32U, M3.size());
+
+ EXPECT_FALSE(doesOverlap(M1, M2));
+ EXPECT_FALSE(doesOverlap(M2, M3));
+ EXPECT_FALSE(doesOverlap(M1, M3));
+
+ EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+ EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+ EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, ZeroSizeNear) {
+ error_code EC;
+ MemoryBlock Near((void*)(4*PageSize), 0);
+ MemoryBlock M1 = Memory::allocateMappedMemory(16, &Near, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+ MemoryBlock M2 = Memory::allocateMappedMemory(64, &Near, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+ MemoryBlock M3 = Memory::allocateMappedMemory(32, &Near, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+
+ EXPECT_NE((void*)0, M1.base());
+ EXPECT_LE(16U, M1.size());
+ EXPECT_NE((void*)0, M2.base());
+ EXPECT_LE(64U, M2.size());
+ EXPECT_NE((void*)0, M3.base());
+ EXPECT_LE(32U, M3.size());
+
+ EXPECT_FALSE(doesOverlap(M1, M2));
+ EXPECT_FALSE(doesOverlap(M2, M3));
+ EXPECT_FALSE(doesOverlap(M1, M3));
+
+ EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+ EXPECT_FALSE(Memory::releaseMappedMemory(M3));
+ EXPECT_FALSE(Memory::releaseMappedMemory(M2));
+}
+
+TEST_P(MappedMemoryTest, UnalignedNear) {
+ error_code EC;
+ MemoryBlock Near((void*)(2*PageSize+5), 0);
+ MemoryBlock M1 = Memory::allocateMappedMemory(15, &Near, Flags, EC);
+ EXPECT_EQ(error_code::success(), EC);
+
+ EXPECT_NE((void*)0, M1.base());
+ EXPECT_LE(sizeof(int), M1.size());
+
+ EXPECT_FALSE(Memory::releaseMappedMemory(M1));
+}
+
+// Note that Memory::MF_WRITE is not supported exclusively across
+// operating systems and architectures and can imply MF_READ|MF_WRITE
+unsigned MemoryFlags[] = {
+ Memory::MF_READ,
+ Memory::MF_WRITE,
+ Memory::MF_READ|Memory::MF_WRITE,
+ Memory::MF_EXEC,
+ Memory::MF_READ|Memory::MF_EXEC,
+ Memory::MF_READ|Memory::MF_WRITE|Memory::MF_EXEC
+ };
+
+INSTANTIATE_TEST_CASE_P(AllocationTests,
+ MappedMemoryTest,
+ ::testing::ValuesIn(MemoryFlags));
+
+} // anonymous namespace
diff --git a/unittests/Support/YAMLParserTest.cpp b/unittests/Support/YAMLParserTest.cpp
index 480a5739f4..e9839358a0 100644
--- a/unittests/Support/YAMLParserTest.cpp
+++ b/unittests/Support/YAMLParserTest.cpp
@@ -10,6 +10,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/YAMLParser.h"
#include "gtest/gtest.h"
@@ -21,6 +22,12 @@ static void SuppressDiagnosticsOutput(const SMDiagnostic &, void *) {
// to reduce noise in unit test runs.
}
+// Assumes Ctx is an SMDiagnostic where Diag can be stored.
+static void CollectDiagnosticsOutput(const SMDiagnostic &Diag, void *Ctx) {
+ SMDiagnostic* DiagOut = static_cast<SMDiagnostic*>(Ctx);
+ *DiagOut = Diag;
+}
+
// Checks that the given input gives a parse error. Makes sure that an error
// text is available and the parse fails.
static void ExpectParseError(StringRef Message, StringRef Input) {
@@ -182,4 +189,31 @@ TEST(YAMLParser, WorksWithIteratorAlgorithms) {
EXPECT_EQ(6, std::distance(Array->begin(), Array->end()));
}
+TEST(YAMLParser, DefaultDiagnosticFilename) {
+ SourceMgr SM;
+
+ SMDiagnostic GeneratedDiag;
+ SM.setDiagHandler(CollectDiagnosticsOutput, &GeneratedDiag);
+
+ // When we construct a YAML stream over an unnamed string,
+ // the filename is hard-coded as "YAML".
+ yaml::Stream UnnamedStream("[]", SM);
+ UnnamedStream.printError(UnnamedStream.begin()->getRoot(), "Hello, World!");
+ EXPECT_EQ("YAML", GeneratedDiag.getFilename());
+}
+
+TEST(YAMLParser, DiagnosticFilenameFromBufferID) {
+ SourceMgr SM;
+
+ SMDiagnostic GeneratedDiag;
+ SM.setDiagHandler(CollectDiagnosticsOutput, &GeneratedDiag);
+
+ // When we construct a YAML stream over a named buffer,
+ // we get its ID as filename in diagnostics.
+ MemoryBuffer* Buffer = MemoryBuffer::getMemBuffer("[]", "buffername.yaml");
+ yaml::Stream Stream(Buffer, SM);
+ Stream.printError(Stream.begin()->getRoot(), "Hello, World!");
+ EXPECT_EQ("buffername.yaml", GeneratedDiag.getFilename());
+}
+
} // end namespace llvm
diff --git a/unittests/VMCore/CMakeLists.txt b/unittests/VMCore/CMakeLists.txt
index 4025c7a91f..8d8bb3bb4d 100644
--- a/unittests/VMCore/CMakeLists.txt
+++ b/unittests/VMCore/CMakeLists.txt
@@ -16,6 +16,7 @@ set(VMCoreSources
TypesTest.cpp
ValueMapTest.cpp
VerifierTest.cpp
+ WaymarkTest.cpp
)
# MSVC9 and 8 cannot compile ValueMapTest.cpp due to their bug.
diff --git a/unittests/VMCore/ConstantsTest.cpp b/unittests/VMCore/ConstantsTest.cpp
index 623ea0d102..25d61cc6ca 100644
--- a/unittests/VMCore/ConstantsTest.cpp
+++ b/unittests/VMCore/ConstantsTest.cpp
@@ -8,8 +8,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Constants.h"
+#include "llvm/Instruction.h"
+#include "llvm/InstrTypes.h"
#include "llvm/DerivedTypes.h"
#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
#include "gtest/gtest.h"
namespace llvm {
@@ -118,5 +121,108 @@ TEST(ConstantsTest, FP128Test) {
EXPECT_TRUE(isa<ConstantFP>(X));
}
+#define CHECK(x, y) { \
+ std::string __s; \
+ raw_string_ostream __o(__s); \
+ cast<ConstantExpr>(x)->getAsInstruction()->print(__o); \
+ __o.flush(); \
+ EXPECT_EQ(std::string(" <badref> = " y), __s); \
+ }
+
+TEST(ConstantsTest, AsInstructionsTest) {
+ Module *M = new Module("MyModule", getGlobalContext());
+
+ Type *Int64Ty = Type::getInt64Ty(getGlobalContext());
+ Type *Int32Ty = Type::getInt32Ty(getGlobalContext());
+ Type *Int16Ty = Type::getInt16Ty(getGlobalContext());
+ Type *Int1Ty = Type::getInt1Ty(getGlobalContext());
+ Type *FloatTy = Type::getFloatTy(getGlobalContext());
+ Type *DoubleTy = Type::getDoubleTy(getGlobalContext());
+
+ Constant *Global = M->getOrInsertGlobal("dummy",
+ PointerType::getUnqual(Int32Ty));
+ Constant *Global2 = M->getOrInsertGlobal("dummy2",
+ PointerType::getUnqual(Int32Ty));
+
+ Constant *P0 = ConstantExpr::getPtrToInt(Global, Int32Ty);
+ Constant *P1 = ConstantExpr::getUIToFP(P0, FloatTy);
+ Constant *P2 = ConstantExpr::getUIToFP(P0, DoubleTy);
+ Constant *P3 = ConstantExpr::getTrunc(P0, Int1Ty);
+ Constant *P4 = ConstantExpr::getPtrToInt(Global2, Int32Ty);
+ Constant *P5 = ConstantExpr::getUIToFP(P4, FloatTy);
+ Constant *P6 = ConstantExpr::getBitCast(P4, VectorType::get(Int16Ty, 2));
+
+ Constant *One = ConstantInt::get(Int32Ty, 1);
+
+ #define P0STR "ptrtoint (i32** @dummy to i32)"
+ #define P1STR "uitofp (i32 ptrtoint (i32** @dummy to i32) to float)"
+ #define P2STR "uitofp (i32 ptrtoint (i32** @dummy to i32) to double)"
+ #define P3STR "ptrtoint (i32** @dummy to i1)"
+ #define P4STR "ptrtoint (i32** @dummy2 to i32)"
+ #define P5STR "uitofp (i32 ptrtoint (i32** @dummy2 to i32) to float)"
+ #define P6STR "bitcast (i32 ptrtoint (i32** @dummy2 to i32) to <2 x i16>)"
+
+ CHECK(ConstantExpr::getNeg(P0), "sub i32 0, " P0STR);
+ CHECK(ConstantExpr::getFNeg(P1), "fsub float -0.000000e+00, " P1STR);
+ CHECK(ConstantExpr::getNot(P0), "xor i32 " P0STR ", -1");
+ CHECK(ConstantExpr::getAdd(P0, P0), "add i32 " P0STR ", " P0STR);
+ CHECK(ConstantExpr::getAdd(P0, P0, false, true), "add nsw i32 " P0STR ", "
+ P0STR);
+ CHECK(ConstantExpr::getAdd(P0, P0, true, true), "add nuw nsw i32 " P0STR ", "
+ P0STR);
+ CHECK(ConstantExpr::getFAdd(P1, P1), "fadd float " P1STR ", " P1STR);
+ CHECK(ConstantExpr::getSub(P0, P0), "sub i32 " P0STR ", " P0STR);
+ CHECK(ConstantExpr::getFSub(P1, P1), "fsub float " P1STR ", " P1STR);
+ CHECK(ConstantExpr::getMul(P0, P0), "mul i32 " P0STR ", " P0STR);
+ CHECK(ConstantExpr::getFMul(P1, P1), "fmul float " P1STR ", " P1STR);
+ CHECK(ConstantExpr::getUDiv(P0, P0), "udiv i32 " P0STR ", " P0STR);
+ CHECK(ConstantExpr::getSDiv(P0, P0), "sdiv i32 " P0STR ", " P0STR);
+ CHECK(ConstantExpr::getFDiv(P1, P1), "fdiv float " P1STR ", " P1STR);
+ CHECK(ConstantExpr::getURem(P0, P0), "urem i32 " P0STR ", " P0STR);
+ CHECK(ConstantExpr::getSRem(P0, P0), "srem i32 " P0STR ", " P0STR);
+ CHECK(ConstantExpr::getFRem(P1, P1), "frem float " P1STR ", " P1STR);
+ CHECK(ConstantExpr::getAnd(P0, P0), "and i32 " P0STR ", " P0STR);
+ CHECK(ConstantExpr::getOr(P0, P0), "or i32 " P0STR ", " P0STR);
+ CHECK(ConstantExpr::getXor(P0, P0), "xor i32 " P0STR ", " P0STR);
+ CHECK(ConstantExpr::getShl(P0, P0), "shl i32 " P0STR ", " P0STR);
+ CHECK(ConstantExpr::getShl(P0, P0, true), "shl nuw i32 " P0STR ", " P0STR);
+ CHECK(ConstantExpr::getShl(P0, P0, false, true), "shl nsw i32 " P0STR ", "
+ P0STR);
+ CHECK(ConstantExpr::getLShr(P0, P0, false), "lshr i32 " P0STR ", " P0STR);
+ CHECK(ConstantExpr::getLShr(P0, P0, true), "lshr exact i32 " P0STR ", " P0STR);
+ CHECK(ConstantExpr::getAShr(P0, P0, false), "ashr i32 " P0STR ", " P0STR);
+ CHECK(ConstantExpr::getAShr(P0, P0, true), "ashr exact i32 " P0STR ", " P0STR);
+
+ CHECK(ConstantExpr::getSExt(P0, Int64Ty), "sext i32 " P0STR " to i64");
+ CHECK(ConstantExpr::getZExt(P0, Int64Ty), "zext i32 " P0STR " to i64");
+ CHECK(ConstantExpr::getFPTrunc(P2, FloatTy), "fptrunc double " P2STR
+ " to float");
+ CHECK(ConstantExpr::getFPExtend(P1, DoubleTy), "fpext float " P1STR
+ " to double");
+
+ CHECK(ConstantExpr::getExactUDiv(P0, P0), "udiv exact i32 " P0STR ", " P0STR);
+
+ CHECK(ConstantExpr::getSelect(P3, P0, P4), "select i1 " P3STR ", i32 " P0STR
+ ", i32 " P4STR);
+ CHECK(ConstantExpr::getICmp(CmpInst::ICMP_EQ, P0, P4), "icmp eq i32 " P0STR
+ ", " P4STR);
+ CHECK(ConstantExpr::getFCmp(CmpInst::FCMP_ULT, P1, P5), "fcmp ult float "
+ P1STR ", " P5STR);
+
+ std::vector<Constant*> V;
+ V.push_back(One);
+ // FIXME: getGetElementPtr() actually creates an inbounds ConstantGEP,
+ // not a normal one!
+ //CHECK(ConstantExpr::getGetElementPtr(Global, V, false),
+ // "getelementptr i32** @dummy, i32 1");
+ CHECK(ConstantExpr::getInBoundsGetElementPtr(Global, V),
+ "getelementptr inbounds i32** @dummy, i32 1");
+
+ CHECK(ConstantExpr::getExtractElement(P6, One), "extractelement <2 x i16> "
+ P6STR ", i32 1");
+}
+
+#undef CHECK
+
} // end anonymous namespace
} // end namespace llvm
diff --git a/unittests/VMCore/WaymarkTest.cpp b/unittests/VMCore/WaymarkTest.cpp
new file mode 100644
index 0000000000..9005b0c988
--- /dev/null
+++ b/unittests/VMCore/WaymarkTest.cpp
@@ -0,0 +1,54 @@
+//===- llvm/unittest/VMCore/WaymarkTest.cpp - getUser() unit tests --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// we perform white-box tests
+//
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "gtest/gtest.h"
+#include <algorithm>
+
+namespace llvm {
+namespace {
+
+Constant *char2constant(char c) {
+ return ConstantInt::get(Type::getInt8Ty(getGlobalContext()), c);
+}
+
+
+TEST(WaymarkTest, NativeArray) {
+ static uint8_t tail[22] = "s02s33s30y2y0s1x0syxS";
+ Value * values[22];
+ std::transform(tail, tail + 22, values, char2constant);
+ FunctionType *FT = FunctionType::get(Type::getVoidTy(getGlobalContext()), true);
+ Function *F = Function::Create(FT, GlobalValue::ExternalLinkage);
+ const CallInst *A = CallInst::Create(F, makeArrayRef(values));
+ ASSERT_NE(A, (const CallInst*)NULL);
+ ASSERT_EQ(1U + 22, A->getNumOperands());
+ const Use *U = &A->getOperandUse(0);
+ const Use *Ue = &A->getOperandUse(22);
+ for (; U != Ue; ++U)
+ {
+ EXPECT_EQ(A, U->getUser());
+ }
+}
+
+TEST(WaymarkTest, TwoBit) {
+ Use* many = (Use*)calloc(sizeof(Use), 8212 + 1);
+ ASSERT_TRUE(many);
+ Use::initTags(many, many + 8212);
+ for (const Use *U = many, *Ue = many + 8212 - 1; U != Ue; ++U)
+ {
+ EXPECT_EQ((User*)(Ue + 1), U->getUser());
+ }
+}
+
+} // end anonymous namespace
+} // end namespace llvm
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp
index e79162867e..c5524656f6 100644
--- a/utils/FileCheck/FileCheck.cpp
+++ b/utils/FileCheck/FileCheck.cpp
@@ -26,6 +26,7 @@
#include "llvm/Support/Signals.h"
#include "llvm/Support/system_error.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include <algorithm>
using namespace llvm;
@@ -63,6 +64,9 @@ class Pattern {
/// RegEx - If non-empty, this is a regex pattern.
std::string RegExStr;
+ /// \brief Contains the number of line this pattern is in.
+ unsigned LineNumber;
+
/// VariableUses - Entries in this vector map to uses of a variable in the
/// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain
/// "foobaz" and we'll get an entry in this vector that tells us to insert the
@@ -79,7 +83,7 @@ public:
Pattern(bool matchEOF = false) : MatchEOF(matchEOF) { }
- bool ParsePattern(StringRef PatternStr, SourceMgr &SM);
+ bool ParsePattern(StringRef PatternStr, SourceMgr &SM, unsigned LineNumber);
/// Match - Match the pattern string against the input buffer Buffer. This
/// returns the position that is matched or npos if there is no match. If
@@ -104,10 +108,16 @@ private:
/// should correspond to a perfect match.
unsigned ComputeMatchDistance(StringRef Buffer,
const StringMap<StringRef> &VariableTable) const;
+
+ /// \brief Evaluates expression and stores the result to \p Value.
+ /// \return true on success. false when the expression has invalid syntax.
+ bool EvaluateExpression(StringRef Expr, std::string &Value) const;
};
-bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
+bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM,
+ unsigned LineNumber) {
+ this->LineNumber = LineNumber;
PatternLoc = SMLoc::getFromPointer(PatternStr.data());
// Ignore trailing whitespace.
@@ -193,13 +203,28 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
return true;
}
- // Verify that the name is well formed.
- for (unsigned i = 0, e = Name.size(); i != e; ++i)
- if (Name[i] != '_' && !isalnum(Name[i])) {
+ // Verify that the name/expression is well formed. FileCheck currently
+ // supports @LINE, @LINE+number, @LINE-number expressions. The check here
+ // is relaxed, more strict check is performed in \c EvaluateExpression.
+ bool IsExpression = false;
+ for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+ if (i == 0 && Name[i] == '@') {
+ if (NameEnd != StringRef::npos) {
+ SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
+ SourceMgr::DK_Error,
+ "invalid name in named regex definition");
+ return true;
+ }
+ IsExpression = true;
+ continue;
+ }
+ if (Name[i] != '_' && !isalnum(Name[i]) &&
+ (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
SourceMgr::DK_Error, "invalid name in named regex");
return true;
}
+ }
// Name can't start with a digit.
if (isdigit(Name[0])) {
@@ -231,7 +256,6 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
AddFixedStringToRegEx(PatternStr.substr(0, FixedMatchEnd), RegExStr);
PatternStr = PatternStr.substr(FixedMatchEnd);
- continue;
}
return false;
@@ -279,6 +303,24 @@ bool Pattern::AddRegExToRegEx(StringRef RegexStr, unsigned &CurParen,
return false;
}
+bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
+ // The only supported expression is @LINE([\+-]\d+)?
+ if (!Expr.startswith("@LINE"))
+ return false;
+ Expr = Expr.substr(StringRef("@LINE").size());
+ int Offset = 0;
+ if (!Expr.empty()) {
+ if (Expr[0] == '+')
+ Expr = Expr.substr(1);
+ else if (Expr[0] != '-')
+ return false;
+ if (Expr.getAsInteger(10, Offset))
+ return false;
+ }
+ Value = llvm::itostr(LineNumber + Offset);
+ return true;
+}
+
/// Match - Match the pattern string against the input buffer Buffer. This
/// returns the position that is matched or npos if there is no match. If
/// there is a match, the size of the matched string is returned in MatchLen.
@@ -307,15 +349,21 @@ size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
unsigned InsertOffset = 0;
for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
- StringMap<StringRef>::iterator it =
- VariableTable.find(VariableUses[i].first);
- // If the variable is undefined, return an error.
- if (it == VariableTable.end())
- return StringRef::npos;
-
- // Look up the value and escape it so that we can plop it into the regex.
std::string Value;
- AddFixedStringToRegEx(it->second, Value);
+
+ if (VariableUses[i].first[0] == '@') {
+ if (!EvaluateExpression(VariableUses[i].first, Value))
+ return StringRef::npos;
+ } else {
+ StringMap<StringRef>::iterator it =
+ VariableTable.find(VariableUses[i].first);
+ // If the variable is undefined, return an error.
+ if (it == VariableTable.end())
+ return StringRef::npos;
+
+ // Look up the value and escape it so that we can plop it into the regex.
+ AddFixedStringToRegEx(it->second, Value);
+ }
// Plop it into the regex at the adjusted offset.
TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
@@ -371,19 +419,31 @@ void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
// variable values.
if (!VariableUses.empty()) {
for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
- StringRef Var = VariableUses[i].first;
- StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
SmallString<256> Msg;
raw_svector_ostream OS(Msg);
-
- // Check for undefined variable references.
- if (it == VariableTable.end()) {
- OS << "uses undefined variable \"";
- OS.write_escaped(Var) << "\"";;
+ StringRef Var = VariableUses[i].first;
+ if (Var[0] == '@') {
+ std::string Value;
+ if (EvaluateExpression(Var, Value)) {
+ OS << "with expression \"";
+ OS.write_escaped(Var) << "\" equal to \"";
+ OS.write_escaped(Value) << "\"";
+ } else {
+ OS << "uses incorrect expression \"";
+ OS.write_escaped(Var) << "\"";
+ }
} else {
- OS << "with variable \"";
- OS.write_escaped(Var) << "\" equal to \"";
- OS.write_escaped(it->second) << "\"";
+ StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
+
+ // Check for undefined variable references.
+ if (it == VariableTable.end()) {
+ OS << "uses undefined variable \"";
+ OS.write_escaped(Var) << "\"";
+ } else {
+ OS << "with variable \"";
+ OS.write_escaped(Var) << "\" equal to \"";
+ OS.write_escaped(it->second) << "\"";
+ }
}
SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
@@ -518,14 +578,20 @@ static bool ReadCheckFile(SourceMgr &SM,
std::vector<std::pair<SMLoc, Pattern> > NotMatches;
+ unsigned LineNumber = 1;
+
while (1) {
// See if Prefix occurs in the memory buffer.
- Buffer = Buffer.substr(Buffer.find(CheckPrefix));
-
+ size_t PrefixLoc = Buffer.find(CheckPrefix);
// If we didn't find a match, we're done.
- if (Buffer.empty())
+ if (PrefixLoc == StringRef::npos)
break;
+ // Recalculate line number.
+ LineNumber += Buffer.substr(0, PrefixLoc).count('\n');
+
+ Buffer = Buffer.substr(PrefixLoc);
+
const char *CheckPrefixStart = Buffer.data();
// When we find a check prefix, keep track of whether we find CHECK: or
@@ -560,7 +626,7 @@ static bool ReadCheckFile(SourceMgr &SM,
// Parse the pattern.
Pattern P;
- if (P.ParsePattern(Buffer.substr(0, EOL), SM))
+ if (P.ParsePattern(Buffer.substr(0, EOL), SM, LineNumber))
return true;
Buffer = Buffer.substr(EOL);
diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp
index 4101076f33..fc7bfe5933 100644
--- a/utils/TableGen/EDEmitter.cpp
+++ b/utils/TableGen/EDEmitter.cpp
@@ -582,6 +582,7 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
REG("cc_out");
REG("s_cc_out");
REG("tGPR");
+ REG("GPRPairOp");
REG("DPR");
REG("DPR_VFP2");
REG("DPR_8");
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index e1910784a6..fe55242930 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -621,7 +621,7 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
OS << " }\n";
OS << " }\n";
- OS << " return AttrListPtr::get(ArrayRef<AttributeWithIndex>(AWI, "
+ OS << " return AttrListPtr::get(C, ArrayRef<AttributeWithIndex>(AWI, "
"NumAttrs));\n";
OS << "}\n";
OS << "#endif // GET_INTRINSIC_ATTRIBUTES\n\n";
diff --git a/utils/UpdateCMakeLists.pl b/utils/UpdateCMakeLists.pl
index d92a767adf..c896ea839b 100755
--- a/utils/UpdateCMakeLists.pl
+++ b/utils/UpdateCMakeLists.pl
@@ -68,7 +68,7 @@ sub UpdateCMake {
while(<IN>) {
if (!$foundLibrary) {
print OUT $_;
- if (/^add_[^_]+_library\(/ || /^add_llvm_target\(/ || /^add_executable\(/) {
+ if (/^add_[^_]+_library\(/ || /^add_llvm_target\(/ || /^add_[^_]+_executable\(/) {
$foundLibrary = 1;
EmitCMakeList($dir);
}
diff --git a/utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp b/utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp
index 4bc58d7579..2b60cb9f20 100644
--- a/utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp
+++ b/utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp
@@ -2,7 +2,7 @@
# Do not edit here. If you wish to override these values
# edit the last section
set target_triplet "x86_64-apple-darwin10"
-set TARGETS_TO_BUILD "X86 Sparc PowerPC ARM Mips CellSPU PIC16 XCore MSP430 Blackfin MSIL CppBackend"
+set TARGETS_TO_BUILD "X86 Sparc PowerPC ARM Mips PIC16 XCore MSP430 Blackfin MSIL CppBackend"
set srcroot "/Volumes/Data/ddunbar/llvm"
set objroot "/Volumes/Data/ddunbar/llvm.obj.64"
set srcdir "/Volumes/Data/ddunbar/llvm/test"
diff --git a/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp b/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp
index 4bc58d7579..2b60cb9f20 100644
--- a/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp
+++ b/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp
@@ -2,7 +2,7 @@
# Do not edit here. If you wish to override these values
# edit the last section
set target_triplet "x86_64-apple-darwin10"
-set TARGETS_TO_BUILD "X86 Sparc PowerPC ARM Mips CellSPU PIC16 XCore MSP430 Blackfin MSIL CppBackend"
+set TARGETS_TO_BUILD "X86 Sparc PowerPC ARM Mips PIC16 XCore MSP430 Blackfin MSIL CppBackend"
set srcroot "/Volumes/Data/ddunbar/llvm"
set objroot "/Volumes/Data/ddunbar/llvm.obj.64"
set srcdir "/Volumes/Data/ddunbar/llvm/test"
diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index 0c1911ed35..e339652f83 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -432,7 +432,9 @@ def parseIntegratedTestScript(test, normalize_slashes=False,
script = []
xfails = []
requires = []
+ line_number = 0
for ln in open(sourcepath):
+ line_number += 1
if 'RUN:' in ln:
# Isolate the command to run.
index = ln.index('RUN:')
@@ -441,6 +443,15 @@ def parseIntegratedTestScript(test, normalize_slashes=False,
# Trim trailing whitespace.
ln = ln.rstrip()
+ # Substitute line number expressions
+ ln = re.sub('%\(line\)', str(line_number), ln)
+ def replace_line_number(match):
+ if match.group(1) == '+':
+ return str(line_number + int(match.group(2)))
+ if match.group(1) == '-':
+ return str(line_number - int(match.group(2)))
+ ln = re.sub('%\(line *([\+-]) *(\d+)\)', replace_line_number, ln)
+
# Collapse lines with trailing '\\'.
if script and script[-1][-1] == '\\':
script[-1] = script[-1][:-1] + ln
diff --git a/utils/wciia.py b/utils/wciia.py
new file mode 100755
index 0000000000..c838819ebe
--- /dev/null
+++ b/utils/wciia.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+
+"""
+wciia - Whose Code Is It Anyway
+
+Determines code owner of the file/folder relative to the llvm source root.
+Code owner is determined from the content of the CODE_OWNERS.TXT
+by parsing the D: field
+
+usage:
+
+utils/wciia.py path
+
+limitations:
+- must be run from llvm source root
+- very simplistic algorithm
+- only handles * as a wildcard
+- not very user friendly
+- does not handle the proposed F: field
+
+"""
+
+import os
+
+code_owners = {}
+
+def process_files_and_folders(owner):
+ filesfolders = owner['filesfolders']
+ # paths must be in ( ... ) so strip them
+ lpar = filesfolders.find('(')
+ rpar = filesfolders.rfind(')')
+ if rpar <= lpar:
+ # give up
+ return
+ paths = filesfolders[lpar+1:rpar]
+ # split paths
+ owner['paths'] = []
+ for path in paths.split():
+ owner['paths'].append(path)
+
+def process_code_owner(owner):
+ if 'filesfolders' in owner:
+ filesfolders = owner['filesfolders']
+ else:
+# print "F: field missing, using D: field"
+ owner['filesfolders'] = owner['description']
+ process_files_and_folders(owner)
+ code_owners[owner['name']] = owner
+
+# process CODE_OWNERS.TXT first
+code_owners_file = open("CODE_OWNERS.TXT", "r").readlines()
+code_owner = {}
+for line in code_owners_file:
+ for word in line.split():
+ if word == "N:":
+ name = line[2:].strip()
+ if code_owner:
+ process_code_owner(code_owner)
+ code_owner = {}
+ # reset the values
+ code_owner['name'] = name
+ if word == "E:":
+ email = line[2:].strip()
+ code_owner['email'] = email
+ if word == "D:":
+ description = line[2:].strip()
+ code_owner['description'] = description
+ if word == "F:":
+ filesfolders = line[2:].strip()
+ code_owner['filesfolders'].append(filesfolders)
+
+def find_owners(fpath):
+ onames = []
+ lmatch = -1
+ # very simplistic way of findning the best match
+ for name in code_owners:
+ owner = code_owners[name]
+ if 'paths' in owner:
+ for path in owner['paths']:
+# print "searching (" + path + ")"
+ # try exact match
+ if fpath == path:
+ return name
+ # see if path ends with a *
+ rstar = path.rfind('*')
+ if rstar>0:
+ # try the longest match,
+ rpos = -1
+ if len(fpath) < len(path):
+ rpos = path.find(fpath)
+ if rpos == 0:
+ onames.append(name)
+ onames.append('Chris Lattner')
+ return onames
+
+# now lest try to find the owner of the file or folder
+import sys
+
+if len(sys.argv) < 2:
+ print "usage " + sys.argv[0] + " file_or_folder"
+ exit(-1)
+
+# the path we are checking
+path = str(sys.argv[1])
+
+# check if this is real path
+if not os.path.exists(path):
+ print "path (" + path + ") does not exist"
+ exit(-1)
+
+owners_name = find_owners(path)
+
+# be gramatically correct
+print "The owner(s) of the (" + path + ") is(are) : " + str(owners_name)
+
+exit(0)
+
+# bottom up walk of the current .
+# not yet used
+root = "."
+for dir,subdirList,fileList in os.walk( root , topdown=False ) :
+ print "dir :" , dir
+ for fname in fileList :
+ print "-" , fname
+ print