aboutsummaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARM.td2
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp6
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp225
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h24
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp11
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp25
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp67
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp3
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td71
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td3
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td69
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td8
-rw-r--r--lib/Target/ARM/ARMSchedule.td24
-rw-r--r--lib/Target/ARM/ARMScheduleA8.td34
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td35
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp26
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp43
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h4
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp40
-rw-r--r--lib/Target/ARM/CMakeLists.txt2
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp51
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp8
-rw-r--r--lib/Target/CellSPU/CMakeLists.txt2
-rw-r--r--lib/Target/CellSPU/SPUAsmPrinter.cpp4
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp6
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp29
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h4
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt2
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp22
-rw-r--r--lib/Target/MBlaze/CMakeLists.txt2
-rw-r--r--lib/Target/MBlaze/MBlaze.td2
-rw-r--r--lib/Target/MBlaze/MBlazeAsmPrinter.cpp8
-rw-r--r--lib/Target/MBlaze/MBlazeSchedule.td5
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.cpp4
-rw-r--r--lib/Target/MSP430/CMakeLists.txt2
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp4
-rw-r--r--lib/Target/Mips/AsmParser/CMakeLists.txt3
-rw-r--r--lib/Target/Mips/CMakeLists.txt3
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp15
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp32
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h12
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp23
-rw-r--r--lib/Target/Mips/Mips.h1
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td27
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td97
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp47
-rw-r--r--lib/Target/Mips/MipsCallingConv.td52
-rw-r--r--lib/Target/Mips/MipsCodeEmitter.cpp4
-rw-r--r--lib/Target/Mips/MipsCondMov.td58
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp54
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp61
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp36
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp155
-rw-r--r--lib/Target/Mips/MipsISelLowering.h8
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td70
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp60
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h26
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td213
-rw-r--r--lib/Target/Mips/MipsJITInfo.cpp6
-rw-r--r--lib/Target/Mips/MipsJITInfo.h6
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp418
-rw-r--r--lib/Target/Mips/MipsMCInstLower.cpp37
-rw-r--r--lib/Target/Mips/MipsMCInstLower.h3
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h24
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp44
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h2
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp4
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp12
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h4
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt1
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp17
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp12
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp10
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h6
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt2
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp27
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h2
-rw-r--r--lib/Target/PowerPC/PPC.td19
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp6
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp213
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp16
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp53
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp93
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h6
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td74
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td10
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp16
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h3
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td146
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp26
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp1
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h2
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp12
-rw-r--r--lib/Target/README.txt6
-rw-r--r--lib/Target/Sparc/CMakeLists.txt2
-rw-r--r--lib/Target/Sparc/SparcAsmPrinter.cpp4
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp6
-rw-r--r--lib/Target/TargetMachine.cpp56
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp75
-rw-r--r--lib/Target/X86/CMakeLists.txt2
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp35
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h3
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h14
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp7
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp6
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp41
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp99
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp91
-rw-r--r--lib/Target/X86/X86InstrBuilder.h16
-rw-r--r--lib/Target/X86/X86InstrFormats.td1
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp70
-rw-r--r--lib/Target/X86/X86InstrInfo.h6
-rw-r--r--lib/Target/X86/X86InstrInfo.td6
-rw-r--r--lib/Target/X86/X86InstrSSE.td586
-rw-r--r--lib/Target/X86/X86InstrVMX.td8
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp60
-rw-r--r--lib/Target/X86/X86RegisterInfo.h8
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp16
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp14
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h10
-rw-r--r--lib/Target/XCore/CMakeLists.txt2
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp14
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td2
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp2
126 files changed, 2944 insertions, 1597 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 10d7f56c7f..7af8b9d909 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -141,7 +141,7 @@ def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
FeatureAvoidPartialCPSR]>;
class ProcNoItin<string Name, list<SubtargetFeature> Features>
- : Processor<Name, GenericItineraries, Features>;
+ : Processor<Name, NoItineraries, Features>;
// V4 Processors.
def : ProcNoItin<"generic", []>;
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 967c0a8462..76cd0c389d 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -23,8 +23,8 @@
#include "InstPrinter/ARMInstPrinter.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMMCExpr.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Module.h"
#include "llvm/Type.h"
#include "llvm/Assembly/Writer.h"
@@ -515,7 +515,9 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
if (ExtraCode[1] != 0) return true; // Unknown modifier.
switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
case 'a': // Print as a memory address.
if (MI->getOperand(OpNum).isReg()) {
O << "["
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 0811d226b4..08e55429ce 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1738,26 +1738,33 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
return Offset == 0;
}
+/// analyzeCompare - For a comparison instruction, return the source registers
+/// in SrcReg and SrcReg2 if having two register operands, and the value it
+/// compares against in CmpValue. Return true if the comparison instruction
+/// can be analyzed.
bool ARMBaseInstrInfo::
-AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask,
- int &CmpValue) const {
+analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
+ int &CmpMask, int &CmpValue) const {
switch (MI->getOpcode()) {
default: break;
case ARM::CMPri:
case ARM::t2CMPri:
SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
CmpMask = ~0;
CmpValue = MI->getOperand(1).getImm();
return true;
case ARM::CMPrr:
case ARM::t2CMPrr:
SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = MI->getOperand(1).getReg();
CmpMask = ~0;
CmpValue = 0;
return true;
case ARM::TSTri:
case ARM::t2TSTri:
SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
CmpMask = MI->getOperand(1).getImm();
CmpValue = 0;
return true;
@@ -1795,21 +1802,67 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
return false;
}
-/// OptimizeCompareInstr - Convert the instruction supplying the argument to the
-/// comparison into one that sets the zero bit in the flags register. Convert
-/// the SUBrr(r1,r2)|Subri(r1,CmpValue) instruction into one that sets the flags
-/// register and remove the CMPrr(r1,r2)|CMPrr(r2,r1)|CMPri(r1,CmpValue)
-/// instruction.
-bool ARMBaseInstrInfo::
-OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
- int CmpValue, const MachineRegisterInfo *MRI) const {
+/// getSwappedCondition - assume the flags are set by MI(a,b), return
+/// the condition code if we modify the instructions such that flags are
+/// set by MI(b,a).
+inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
+ switch (CC) {
+ default: return ARMCC::AL;
+ case ARMCC::EQ: return ARMCC::EQ;
+ case ARMCC::NE: return ARMCC::NE;
+ case ARMCC::HS: return ARMCC::LS;
+ case ARMCC::LO: return ARMCC::HI;
+ case ARMCC::HI: return ARMCC::LO;
+ case ARMCC::LS: return ARMCC::HS;
+ case ARMCC::GE: return ARMCC::LE;
+ case ARMCC::LT: return ARMCC::GT;
+ case ARMCC::GT: return ARMCC::LT;
+ case ARMCC::LE: return ARMCC::GE;
+ }
+}
+
+/// isRedundantFlagInstr - check whether the first instruction, whose only
+/// purpose is to update flags, can be made redundant.
+/// CMPrr can be made redundant by SUBrr if the operands are the same.
+/// CMPri can be made redundant by SUBri if the operands are the same.
+/// This function can be extended later on.
+inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
+ unsigned SrcReg2, int ImmValue,
+ MachineInstr *OI) {
+ if ((CmpI->getOpcode() == ARM::CMPrr ||
+ CmpI->getOpcode() == ARM::t2CMPrr) &&
+ (OI->getOpcode() == ARM::SUBrr ||
+ OI->getOpcode() == ARM::t2SUBrr) &&
+ ((OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getReg() == SrcReg2) ||
+ (OI->getOperand(1).getReg() == SrcReg2 &&
+ OI->getOperand(2).getReg() == SrcReg)))
+ return true;
- MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
- if (llvm::next(DI) != MRI->def_end())
- // Only support one definition.
- return false;
+ if ((CmpI->getOpcode() == ARM::CMPri ||
+ CmpI->getOpcode() == ARM::t2CMPri) &&
+ (OI->getOpcode() == ARM::SUBri ||
+ OI->getOpcode() == ARM::t2SUBri) &&
+ OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getImm() == ImmValue)
+ return true;
+ return false;
+}
- MachineInstr *MI = &*DI;
+/// optimizeCompareInstr - Convert the instruction supplying the argument to the
+/// comparison into one that sets the zero bit in the flags register;
+/// Remove a redundant Compare instruction if an earlier instruction can set the
+/// flags in the same way as Compare.
+/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
+/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
+/// condition code of instructions which use the flags.
+bool ARMBaseInstrInfo::
+optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
+ int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const {
+ // Get the unique definition of SrcReg.
+ MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+ if (!MI) return false;
// Masked compares sometimes use the same register as the corresponding 'and'.
if (CmpMask != ~0) {
@@ -1840,13 +1893,10 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
// For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
// For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
MachineInstr *Sub = NULL;
- unsigned SrcReg2 = 0;
- if (CmpInstr->getOpcode() == ARM::CMPrr ||
- CmpInstr->getOpcode() == ARM::t2CMPrr) {
- SrcReg2 = CmpInstr->getOperand(1).getReg();
+ if (SrcReg2 != 0)
// MI is not a candidate for CMPrr.
MI = NULL;
- } else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
+ else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
// Conservatively refuse to convert an instruction which isn't in the same
// BB as the comparison.
// For CMPri, we need to check Sub, thus we can't return here.
@@ -1859,40 +1909,19 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
// Check that CPSR isn't set between the comparison instruction and the one we
// want to change. At the same time, search for Sub.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
--I;
for (; I != E; --I) {
const MachineInstr &Instr = *I;
- for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
- const MachineOperand &MO = Instr.getOperand(IO);
- if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR))
- return false;
- if (!MO.isReg()) continue;
-
+ if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
+ Instr.readsRegister(ARM::CPSR, TRI))
// This instruction modifies or uses CPSR after the one we want to
// change. We can't do this transformation.
- if (MO.getReg() == ARM::CPSR)
- return false;
- }
-
- // Check whether the current instruction is SUB(r1, r2) or SUB(r2, r1).
- if (SrcReg2 != 0 &&
- (Instr.getOpcode() == ARM::SUBrr ||
- Instr.getOpcode() == ARM::t2SUBrr) &&
- ((Instr.getOperand(1).getReg() == SrcReg &&
- Instr.getOperand(2).getReg() == SrcReg2) ||
- (Instr.getOperand(1).getReg() == SrcReg2 &&
- Instr.getOperand(2).getReg() == SrcReg))) {
- Sub = &*I;
- break;
- }
+ return false;
- // Check whether the current instruction is SUBri(r1, CmpValue).
- if ((CmpInstr->getOpcode() == ARM::CMPri ||
- CmpInstr->getOpcode() == ARM::t2CMPri) &&
- Instr.getOpcode() == ARM::SUBri && CmpValue != 0 &&
- Instr.getOperand(1).getReg() == SrcReg &&
- Instr.getOperand(2).getImm() == CmpValue) {
+ // Check whether CmpInstr can be made redundant by the current instruction.
+ if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
Sub = &*I;
break;
}
@@ -1950,7 +1979,8 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
// CPSR use (i.e. used in another block), then it's not safe to perform
// the optimization.
// When checking against Sub, we handle the condition codes GE, LT, GT, LE.
- SmallVector<MachineOperand*, 4> OperandsToUpdate;
+ SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
+ OperandsToUpdate;
bool isSafe = false;
I = CmpInstr;
E = CmpInstr->getParent()->end();
@@ -1971,30 +2001,20 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
}
// Condition code is after the operand before CPSR.
ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
- if (Sub)
- switch (CC) {
- default:
+ if (Sub) {
+ ARMCC::CondCodes NewCC = getSwappedCondition(CC);
+ if (NewCC == ARMCC::AL)
return false;
- case ARMCC::GE:
- case ARMCC::LT:
- case ARMCC::GT:
- case ARMCC::LE:
- case ARMCC::HS:
- case ARMCC::LS:
- case ARMCC::HI:
- case ARMCC::LO:
- case ARMCC::EQ:
- case ARMCC::NE:
- // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
- // on CMP needs to be updated to be based on SUB.
- // Push the condition code operands to OperandsToUpdate.
- // If it is safe to remove CmpInstr, the condition code of these
- // operands will be modified.
- if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
- Sub->getOperand(2).getReg() == SrcReg)
- OperandsToUpdate.push_back(&((*I).getOperand(IO-1)));
- break;
- }
+ // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
+ // on CMP needs to be updated to be based on SUB.
+ // Push the condition code operands to OperandsToUpdate.
+ // If it is safe to remove CmpInstr, the condition code of these
+ // operands will be modified.
+ if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg)
+ OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)),
+ NewCC));
+ }
else
switch (CC) {
default:
@@ -2024,26 +2044,9 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
// Modify the condition code of operands in OperandsToUpdate.
// Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
// be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
- for (unsigned i = 0; i < OperandsToUpdate.size(); i++) {
- ARMCC::CondCodes CC = (ARMCC::CondCodes)OperandsToUpdate[i]->getImm();
- ARMCC::CondCodes NewCC;
- switch (CC) {
- default: llvm_unreachable("only expecting less/greater comparisons here");
- case ARMCC::GE: NewCC = ARMCC::LE; break;
- case ARMCC::LT: NewCC = ARMCC::GT; break;
- case ARMCC::GT: NewCC = ARMCC::LT; break;
- case ARMCC::LE: NewCC = ARMCC::GE; break;
- case ARMCC::HS: NewCC = ARMCC::LS; break;
- case ARMCC::LS: NewCC = ARMCC::HS; break;
- case ARMCC::HI: NewCC = ARMCC::LO; break;
- case ARMCC::LO: NewCC = ARMCC::HI; break;
- case ARMCC::EQ:
- case ARMCC::NE:
- NewCC = CC;
- break;
- }
- OperandsToUpdate[i]->setImm(NewCC);
- }
+ for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
+ OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
+
return true;
}
}
@@ -2175,9 +2178,9 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
const MCInstrDesc &Desc = MI->getDesc();
unsigned Class = Desc.getSchedClass();
- unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
- if (UOps)
- return UOps;
+ int ItinUOps = ItinData->getNumMicroOps(Class);
+ if (ItinUOps >= 0)
+ return ItinUOps;
unsigned Opc = MI->getOpcode();
switch (Opc) {
@@ -2251,19 +2254,19 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
return 2;
// 4 registers would be issued: 2, 2.
// 5 registers would be issued: 2, 2, 1.
- UOps = (NumRegs / 2);
+ int A8UOps = (NumRegs / 2);
if (NumRegs % 2)
- ++UOps;
- return UOps;
+ ++A8UOps;
+ return A8UOps;
} else if (Subtarget.isCortexA9()) {
- UOps = (NumRegs / 2);
+ int A9UOps = (NumRegs / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
if ((NumRegs % 2) ||
!MI->hasOneMemOperand() ||
(*MI->memoperands_begin())->getAlignment() < 8)
- ++UOps;
- return UOps;
+ ++A9UOps;
+ return A9UOps;
} else {
// Assume the worst.
return NumRegs;
@@ -2763,11 +2766,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
unsigned NewUseIdx;
const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
Reg, NewUseIdx, UseAdj);
- if (NewUseMI) {
- UseMI = NewUseMI;
- UseIdx = NewUseIdx;
- UseMCID = &UseMI->getDesc();
- }
+ if (!NewUseMI)
+ return -1;
+
+ UseMI = NewUseMI;
+ UseIdx = NewUseIdx;
+ UseMCID = &UseMI->getDesc();
}
if (Reg == ARM::CPSR) {
@@ -2795,6 +2799,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
+ if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit())
+ return -1;
+
unsigned DefAlign = DefMI->hasOneMemOperand()
? (*DefMI->memoperands_begin())->getAlignment() : 0;
unsigned UseAlign = UseMI->hasOneMemOperand()
@@ -3015,9 +3022,7 @@ ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
return 1;
// If the second MI is predicated, then there is an implicit use dependency.
- int Latency = getOperandLatency(ItinData, DefMI, DefIdx, DepMI,
- DepMI->getNumOperands());
- return (Latency <= 0) ? 1 : Latency;
+ return getInstrLatency(ItinData, DefMI);
}
unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
@@ -3054,9 +3059,9 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
unsigned Class = MCID.getSchedClass();
// For instructions with variable uops, use uops as latency.
- if (!ItinData->isEmpty() && !ItinData->Itineraries[Class].NumMicroOps) {
+ if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
return getNumMicroOps(ItinData, MI);
- }
+
// For the common case, fall back on the itinerary's latency.
unsigned Latency = ItinData->getStageLatency(Class);
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 8217f239d1..1a10a4ab1c 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -186,16 +186,20 @@ public:
return NumCycles == 1;
}
- /// AnalyzeCompare - For a comparison instruction, return the source register
- /// in SrcReg and the value it compares against in CmpValue. Return true if
- /// the comparison instruction can be analyzed.
- virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
- int &CmpMask, int &CmpValue) const;
-
- /// OptimizeCompareInstr - Convert the instruction to set the zero flag so
- /// that we can remove a "comparison with zero".
- virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
- int CmpMask, int CmpValue,
+ /// analyzeCompare - For a comparison instruction, return the source registers
+ /// in SrcReg and SrcReg2 if having two register operands, and the value it
+ /// compares against in CmpValue. Return true if the comparison instruction
+ /// can be analyzed.
+ virtual bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ unsigned &SrcReg2, int &CmpMask,
+ int &CmpValue) const;
+
+ /// optimizeCompareInstr - Convert the instruction to set the zero flag so
+ /// that we can remove a "comparison with zero"; Remove a redundant CMP
+ /// instruction if the flags can be updated in the same way by an earlier
+ /// instruction such as SUB.
+ virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+ unsigned SrcReg2, int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const;
/// FoldImmediate - 'Reg' is known to be defined by a move immediate
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index c386a01e89..3650e1fb77 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -471,22 +471,23 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
MIB.addOperand(MI.getOperand(OpIdx++));
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ bool SrcIsUndef = MI.getOperand(OpIdx).isUndef();
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
- MIB.addReg(D0);
+ MIB.addReg(D0, getUndefRegState(SrcIsUndef));
if (NumRegs > 1 && TableEntry->copyAllListRegs)
- MIB.addReg(D1);
+ MIB.addReg(D1, getUndefRegState(SrcIsUndef));
if (NumRegs > 2 && TableEntry->copyAllListRegs)
- MIB.addReg(D2);
+ MIB.addReg(D2, getUndefRegState(SrcIsUndef));
if (NumRegs > 3 && TableEntry->copyAllListRegs)
- MIB.addReg(D3);
+ MIB.addReg(D3, getUndefRegState(SrcIsUndef));
// Copy the predicate operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
- if (SrcIsKill) // Add an implicit kill for the super-reg.
+ if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg.
MIB->addRegisterKilled(SrcReg, TRI, true);
TransferImpOps(MI, MIB, MIB);
// Transfer memoperands.
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 2158b7e028..ff660210ea 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -177,7 +177,6 @@ class ARMFastISel : public FastISel {
bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
unsigned Alignment = 0, bool isZExt = true,
bool allocReg = true);
-
bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
unsigned Alignment = 0);
bool ARMComputeAddress(const Value *Obj, Address &Addr);
@@ -1361,7 +1360,7 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
.addReg(AddrReg));
- return true;
+ return true;
}
bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
@@ -1740,7 +1739,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
// type and the target independent selector doesn't know how to handle it.
if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
return false;
-
+
unsigned Opc;
switch (ISDOpcode) {
default: return false;
@@ -2146,7 +2145,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
return false;
// Can't handle non-double multi-reg retvals.
- if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
+ if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
@@ -2352,7 +2351,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
MIB.addReg(CalleeReg);
else if (!IntrMemName)
MIB.addGlobalAddress(GV, 0, 0);
- else
+ else
MIB.addExternalSymbol(IntrMemName, 0);
} else {
if (UseReg)
@@ -2365,7 +2364,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// Explicitly adding the predicate here.
AddDefaultPred(MIB);
}
-
+
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
@@ -2486,10 +2485,10 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
return true;
}
}
-
+
if (!MTI.getLength()->getType()->isIntegerTy(32))
return false;
-
+
if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
return false;
@@ -2501,13 +2500,13 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
// Don't handle volatile.
if (MSI.isVolatile())
return false;
-
+
if (!MSI.getLength()->getType()->isIntegerTy(32))
return false;
-
+
if (MSI.getDestAddressSpace() > 255)
return false;
-
+
return SelectCall(&I, "memset");
}
case Intrinsic::trap: {
@@ -2518,7 +2517,7 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
}
bool ARMFastISel::SelectTrunc(const Instruction *I) {
- // The high bits for a type smaller than the register size are assumed to be
+ // The high bits for a type smaller than the register size are assumed to be
// undefined.
Value *Op = I->getOperand(0);
@@ -2709,7 +2708,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
// See if we can handle this address.
Address Addr;
if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
-
+
unsigned ResultReg = MI->getOperand(0).getReg();
if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
return false;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index e1c89e0c42..238b79e1f1 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -6561,11 +6561,12 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
bool isThumb2 = Subtarget->isThumb2();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned ldrOpc, strOpc, UnitSize;
+ unsigned ldrOpc, strOpc, UnitSize = 0;
const TargetRegisterClass *TRC = isThumb2 ?
(const TargetRegisterClass*)&ARM::tGPRRegClass :
(const TargetRegisterClass*)&ARM::GPRRegClass;
+ const TargetRegisterClass *TRC_Vec = 0;
if (Align & 1) {
ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
@@ -6576,10 +6577,30 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST;
UnitSize = 2;
} else {
- ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
- UnitSize = 4;
+ // Check whether we can use NEON instructions.
+ if (!MF->getFunction()->hasFnAttr(Attribute::NoImplicitFloat) &&
+ Subtarget->hasNEON()) {
+ if ((Align % 16 == 0) && SizeVal >= 16) {
+ ldrOpc = ARM::VLD1q32wb_fixed;
+ strOpc = ARM::VST1q32wb_fixed;
+ UnitSize = 16;
+ TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass;
+ }
+ else if ((Align % 8 == 0) && SizeVal >= 8) {
+ ldrOpc = ARM::VLD1d32wb_fixed;
+ strOpc = ARM::VST1d32wb_fixed;
+ UnitSize = 8;
+ TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass;
+ }
+ }
+ // Can't use NEON instructions.
+ if (UnitSize == 0) {
+ ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
+ strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
+ UnitSize = 4;
+ }
}
+
unsigned BytesLeft = SizeVal % UnitSize;
unsigned LoopSize = SizeVal - BytesLeft;
@@ -6590,10 +6611,17 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
unsigned srcIn = src;
unsigned destIn = dest;
for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
+ unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
+ if (UnitSize >= 8) {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc), scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(destIn).addImm(0).addReg(scratch));
+ } else if (isThumb2) {
AddDefaultPred(BuildMI(*BB, MI, dl,
TII->get(ldrOpc), scratch)
.addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize));
@@ -6739,8 +6767,14 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
// [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
- unsigned scratch = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
+ unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
+ if (UnitSize >= 8) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
+ .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0));
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
+ .addReg(destPhi).addImm(0).addReg(scratch));
+ } else if (isThumb2) {
AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
.addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize));
@@ -7113,9 +7147,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineRegisterInfo &MRI = Fn->getRegInfo();
// In Thumb mode S must not be specified if source register is the SP or
// PC and if destination register is the SP, so restrict register class
- unsigned NewMovDstReg = MRI.createVirtualRegister(isThumb2 ?
- (const TargetRegisterClass*)&ARM::rGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass);
unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ?
(const TargetRegisterClass*)&ARM::rGPRRegClass :
(const TargetRegisterClass*)&ARM::GPRRegClass);
@@ -7132,12 +7163,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// fall through to SinkMBB
RSBBB->addSuccessor(SinkBB);
- // insert a movs at the end of BB
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr),
- NewMovDstReg)
- .addReg(ABSSrcReg, RegState::Kill)
- .addImm((unsigned)ARMCC::AL).addReg(0)
- .addReg(ARM::CPSR, RegState::Define);
+ // insert a cmp at the end of BB
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(ABSSrcReg).addImm(0));
// insert a bcc with opposite CC to ARMCC::MI at the end of BB
BuildMI(BB, dl,
@@ -7149,7 +7178,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// by if-conversion pass
BuildMI(*RSBBB, RSBBB->begin(), dl,
TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
- .addReg(NewMovDstReg, RegState::Kill)
+ .addReg(ABSSrcReg, RegState::Kill)
.addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
// insert PHI in SinkBB,
@@ -7157,7 +7186,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BuildMI(*SinkBB, SinkBB->begin(), dl,
TII->get(ARM::PHI), ABSDstReg)
.addReg(NewRsbDstReg).addMBB(RSBBB)
- .addReg(NewMovDstReg).addMBB(BB);
+ .addReg(ABSSrcReg).addMBB(BB);
// remove ABS instruction
MI->eraseFromParent();
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index b8f607eb4c..31b0c41f08 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -31,7 +31,8 @@ ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
if (hasNOP()) {
- NopInst.setOpcode(ARM::NOP);
+ NopInst.setOpcode(ARM::HINT);
+ NopInst.addOperand(MCOperand::CreateImm(0));
NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
NopInst.addOperand(MCOperand::CreateReg(0));
} else {
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 611d9194fd..6a14871bb0 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -244,7 +244,8 @@ def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
// But only select them if more precision in FP computation is allowed.
// Do not use them for Darwin platforms.
-def UseFusedMAC : Predicate<"!TM.Options.NoExcessFPPrecision && "
+def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
+ " FPOpFusion::Fast) && "
"!Subtarget->isTargetDarwin()">;
def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || "
"Subtarget->isTargetDarwin()">;
@@ -265,9 +266,9 @@ class RegConstraint<string C> {
// ARM specific transformation functions and pattern fragments.
//
-// so_imm_neg_XFORM - Return a so_imm value packed into the format described for
-// so_imm_neg def below.
-def so_imm_neg_XFORM : SDNodeXForm<imm, [{
+// imm_neg_XFORM - Return a imm value packed into the format described for
+// imm_neg defs below.
+def imm_neg_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
}]>;
@@ -286,7 +287,7 @@ def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; }
def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
int64_t Value = -(int)N->getZExtValue();
return Value && ARM_AM::getSOImmVal(Value) != -1;
- }], so_imm_neg_XFORM> {
+ }], imm_neg_XFORM> {
let ParserMatchClass = so_imm_neg_asmoperand;
}
@@ -599,7 +600,10 @@ def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> {
}
/// imm0_15 predicate - Immediate in the range [0,15].
-def Imm0_15AsmOperand: ImmAsmOperand { let Name = "Imm0_15"; }
+def Imm0_15AsmOperand: ImmAsmOperand {
+ let Name = "Imm0_15";
+ let DiagnosticType = "ImmRange0_15";
+}
def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 16;
}]> {
@@ -644,6 +648,11 @@ def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_65535AsmOperand;
}
+// imm0_65535_neg - An immediate whose negative value is in the range [0.65535].
+def imm0_65535_neg : Operand<i32>, ImmLeaf<i32, [{
+ return -Imm >= 0 && -Imm < 65536;
+}]>;
+
// imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference
// a relocatable expression.
//
@@ -1640,33 +1649,18 @@ def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
NoItinerary, []>;
}
-def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "", []>,
- Requires<[IsARM, HasV6T2]> {
- let Inst{27-16} = 0b001100100000;
- let Inst{15-8} = 0b11110000;
- let Inst{7-0} = 0b00000000;
+def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary,
+ "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> {
+ bits<8> imm;
+ let Inst{27-8} = 0b00110010000011110000;
+ let Inst{7-0} = imm;
}
-def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "", []>,
- Requires<[IsARM, HasV6T2]> {
- let Inst{27-16} = 0b001100100000;
- let Inst{15-8} = 0b11110000;
- let Inst{7-0} = 0b00000001;
-}
-
-def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "", []>,
- Requires<[IsARM, HasV6T2]> {
- let Inst{27-16} = 0b001100100000;
- let Inst{15-8} = 0b11110000;
- let Inst{7-0} = 0b00000010;
-}
-
-def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "", []>,
- Requires<[IsARM, HasV6T2]> {
- let Inst{27-16} = 0b001100100000;
- let Inst{15-8} = 0b11110000;
- let Inst{7-0} = 0b00000011;
-}
+def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>;
def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
"\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> {
@@ -1679,18 +1673,10 @@ def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
let Inst{27-20} = 0b01101000;
let Inst{7-4} = 0b1011;
let Inst{11-8} = 0b1111;
-
let Unpredictable{11-8} = 0b1111;
}
-def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
- []>, Requires<[IsARM, HasV6T2]> {
- let Inst{27-16} = 0b001100100000;
- let Inst{15-8} = 0b11110000;
- let Inst{7-0} = 0b00000100;
-}
-
-// The i32imm operand $val can be used by a debugger to store more information
+// The 16-bit operand $val can be used by a debugger to store more information
// about the breakpoint.
def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
"bkpt", "\t$val", []>, Requires<[IsARM]> {
@@ -3243,6 +3229,11 @@ def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm),
(SUBSri GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(add GPR:$src, imm0_65535_neg:$imm),
+ (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>;
+def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm),
+ (SUBSrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>;
+
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 66daa1cb69..fec61d2390 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -32,9 +32,6 @@ def imm_sr : Operand<i32>, PatLeaf<(imm), [{
let ParserMatchClass = ThumbSRImmAsmOperand;
}
-def imm_neg_XFORM : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
-}]>;
def imm_comp_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
}]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 58119baea5..7ea96772aa 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -62,6 +62,15 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(-((int)N->getZExtValue()), MVT::i32);
}]>;
+// so_imm_notSext_XFORM - Return a so_imm value packed into the format
+// described for so_imm_notSext def below, with sign extension from 16
+// bits.
+def t2_so_imm_notSext16_XFORM : SDNodeXForm<imm, [{
+ APInt apIntN = N->getAPIntValue();
+ unsigned N16bitSignExt = apIntN.trunc(16).sext(32).getZExtValue();
+ return CurDAG->getTargetConstant(~N16bitSignExt, MVT::i32);
+}]>;
+
// t2_so_imm - Match a 32-bit immediate operand, which is an
// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
// immediate splatted into multiple bytes of the word.
@@ -86,6 +95,17 @@ def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{
let ParserMatchClass = t2_so_imm_not_asmoperand;
}
+// t2_so_imm_notSext - match an immediate that is a complement of a t2_so_imm
+// if the upper 16 bits are zero.
+def t2_so_imm_notSext : Operand<i32>, PatLeaf<(imm), [{
+ APInt apIntN = N->getAPIntValue();
+ if (!apIntN.isIntN(16)) return false;
+ unsigned N16bitSignExt = apIntN.trunc(16).sext(32).getZExtValue();
+ return ARM_AM::getT2SOImmVal(~N16bitSignExt) != -1;
+ }], t2_so_imm_notSext16_XFORM> {
+ let ParserMatchClass = t2_so_imm_not_asmoperand;
+}
+
// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; }
def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
@@ -1911,11 +1931,16 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
(t2SUBri GPR:$src, t2_so_imm_neg:$imm)>;
def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
(t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
+def : T2Pat<(add GPR:$src, imm0_65535_neg:$imm),
+ (t2SUBrr GPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
+
let AddedComplexity = 1 in
def : T2Pat<(ARMaddc rGPR:$src, imm0_255_neg:$imm),
(t2SUBSri rGPR:$src, imm0_255_neg:$imm)>;
def : T2Pat<(ARMaddc rGPR:$src, t2_so_imm_neg:$imm),
(t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(ARMaddc rGPR:$src, imm0_65535_neg:$imm),
+ (t2SUBSrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
@@ -1924,6 +1949,8 @@ def : T2Pat<(ARMadde rGPR:$src, imm0_255_not:$imm, CPSR),
(t2SBCri rGPR:$src, imm0_255_not:$imm)>;
def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR),
(t2SBCri rGPR:$src, t2_so_imm_not:$imm)>;
+def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR),
+ (t2SBCrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
// Select Bytes -- for disassembly only
@@ -2134,8 +2161,8 @@ defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31,
BinOpFrag<(rotr node:$LHS, node:$RHS)>, "t2ROR">;
// (rotr x, (and y, 0x...1f)) ==> (ROR x, y)
-def : Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
- (t2RORrr rGPR:$lhs, rGPR:$rhs)>;
+def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
+ (t2RORrr rGPR:$lhs, rGPR:$rhs)>;
let Uses = [CPSR] in {
def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
@@ -2332,6 +2359,17 @@ let AddedComplexity = 1 in
def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
(t2BICri rGPR:$src, t2_so_imm_not:$imm)>;
+// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
+def top16Zero: PatLeaf<(i32 rGPR:$src), [{
+ return CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
+ }]>;
+
+// so_imm_notSext is needed instead of so_imm_not, as the value of imm
+// will match the extended, not the original bitWidth for $src.
+def : T2Pat<(and top16Zero:$src, t2_so_imm_notSext:$imm),
+ (t2BICri rGPR:$src, t2_so_imm_notSext:$imm)>;
+
+
// FIXME: Disable this pattern on Darwin to workaround an assembler bug.
def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm),
(t2ORNri rGPR:$src, t2_so_imm_not:$imm)>,
@@ -3426,21 +3464,18 @@ let imod = 0, iflags = 0, M = 1 in
// A6.3.4 Branches and miscellaneous control
// Table A6-14 Change Processor State, and hint instructions
-class T2I_hint<bits<8> op7_0, string opc, string asm>
- : T2I<(outs), (ins), NoItinerary, opc, asm, []> {
- let Inst{31-20} = 0xf3a;
- let Inst{19-16} = 0b1111;
- let Inst{15-14} = 0b10;
- let Inst{12} = 0;
- let Inst{10-8} = 0b000;
- let Inst{7-0} = op7_0;
-}
-
-def t2NOP : T2I_hint<0b00000000, "nop", ".w">;
-def t2YIELD : T2I_hint<0b00000001, "yield", ".w">;
-def t2WFE : T2I_hint<0b00000010, "wfe", ".w">;
-def t2WFI : T2I_hint<0b00000011, "wfi", ".w">;
-def t2SEV : T2I_hint<0b00000100, "sev", ".w">;
+def t2HINT : T2I<(outs), (ins imm0_255:$imm), NoItinerary, "hint", "\t$imm",[]>{
+ bits<8> imm;
+ let Inst{31-8} = 0b111100111010111110000000;
+ let Inst{7-0} = imm;
+}
+
+def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_255:$imm, pred:$p)>;
+def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>;
+def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>;
+def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>;
+def : t2InstAlias<"wfi$p.w", (t2HINT 3, pred:$p)>;
+def : t2InstAlias<"sev$p.w", (t2HINT 4, pred:$p)>;
def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
bits<4> opt;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index dccbffa4c9..4e2cda433b 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -1207,6 +1207,14 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
+
+// (fma x, y, (fneg z)) -> (vfnms z, x, y))
+def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))),
+ (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+ Requires<[HasVFP4]>;
+def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))),
+ (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+ Requires<[HasVFP4]>;
// (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y)
def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 45486fd0b6..81d2fa37c2 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -70,11 +70,11 @@ def IIC_iLoad_bh_siu : InstrItinClass;
def IIC_iLoad_d_i : InstrItinClass;
def IIC_iLoad_d_r : InstrItinClass;
def IIC_iLoad_d_ru : InstrItinClass;
-def IIC_iLoad_m : InstrItinClass<0>; // micro-coded
-def IIC_iLoad_mu : InstrItinClass<0>; // micro-coded
-def IIC_iLoad_mBr : InstrItinClass<0>; // micro-coded
-def IIC_iPop : InstrItinClass<0>; // micro-coded
-def IIC_iPop_Br : InstrItinClass<0>; // micro-coded
+def IIC_iLoad_m : InstrItinClass;
+def IIC_iLoad_mu : InstrItinClass;
+def IIC_iLoad_mBr : InstrItinClass;
+def IIC_iPop : InstrItinClass;
+def IIC_iPop_Br : InstrItinClass;
def IIC_iLoadiALU : InstrItinClass;
def IIC_iStore_i : InstrItinClass;
def IIC_iStore_r : InstrItinClass;
@@ -91,8 +91,8 @@ def IIC_iStore_bh_siu : InstrItinClass;
def IIC_iStore_d_i : InstrItinClass;
def IIC_iStore_d_r : InstrItinClass;
def IIC_iStore_d_ru : InstrItinClass;
-def IIC_iStore_m : InstrItinClass<0>; // micro-coded
-def IIC_iStore_mu : InstrItinClass<0>; // micro-coded
+def IIC_iStore_m : InstrItinClass;
+def IIC_iStore_mu : InstrItinClass;
def IIC_Preload : InstrItinClass;
def IIC_Br : InstrItinClass;
def IIC_fpSTAT : InstrItinClass;
@@ -126,12 +126,12 @@ def IIC_fpSQRT32 : InstrItinClass;
def IIC_fpSQRT64 : InstrItinClass;
def IIC_fpLoad32 : InstrItinClass;
def IIC_fpLoad64 : InstrItinClass;
-def IIC_fpLoad_m : InstrItinClass<0>; // micro-coded
-def IIC_fpLoad_mu : InstrItinClass<0>; // micro-coded
+def IIC_fpLoad_m : InstrItinClass;
+def IIC_fpLoad_mu : InstrItinClass;
def IIC_fpStore32 : InstrItinClass;
def IIC_fpStore64 : InstrItinClass;
-def IIC_fpStore_m : InstrItinClass<0>; // micro-coded
-def IIC_fpStore_mu : InstrItinClass<0>; // micro-coded
+def IIC_fpStore_m : InstrItinClass;
+def IIC_fpStore_mu : InstrItinClass;
def IIC_VLD1 : InstrItinClass;
def IIC_VLD1x2 : InstrItinClass;
def IIC_VLD1x3 : InstrItinClass;
@@ -258,8 +258,6 @@ def IIC_VTBX4 : InstrItinClass;
//===----------------------------------------------------------------------===//
// Processor instruction itineraries.
-def GenericItineraries : ProcessorItineraries<[], [], []>;
-
include "ARMScheduleV6.td"
include "ARMScheduleA8.td"
include "ARMScheduleA9.td"
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index eb1083ca23..61de00a208 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -155,28 +155,30 @@ def CortexA8Itineraries : MultiIssueItineraries<
// Load multiple, def is the 5th operand. Pipeline 0 only.
// FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Pipe0], 0>,
- InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1, 3], [], -1>, // dynamic uops
//
// Load multiple + update, defs are the 1st and 5th operands.
InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>,
- InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 3], [], -1>, // dynamic uops
//
// Load multiple plus branch
InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>,
InstrStage<3, [A8_LSPipe]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
- [1, 2, 1, 1, 3]>,
+ [1, 2, 1, 1, 3], [], -1>, // dynamic uops
//
// Pop, def is the 3rd operand.
InstrItinData<IIC_iPop , [InstrStage<3, [A8_Pipe0], 0>,
- InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 3], [], -1>, // dynamic uops
//
// Push, def is the 3th operand.
InstrItinData<IIC_iPop_Br, [InstrStage<3, [A8_Pipe0], 0>,
InstrStage<3, [A8_LSPipe]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
- [1, 1, 3]>,
-
+ [1, 1, 3], [], -1>, // dynamic uops
//
// iLoadi + iALUr for t2LDRpci_pic.
InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
@@ -231,12 +233,13 @@ def CortexA8Itineraries : MultiIssueItineraries<
// Store multiple. Pipeline 0 only.
// FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>,
- InstrStage<2, [A8_LSPipe]>]>,
+ InstrStage<2, [A8_LSPipe]>],
+ [], [], -1>, // dynamic uops
//
// Store multiple + update
InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
- InstrStage<2, [A8_LSPipe]>], [2]>,
-
+ InstrStage<2, [A8_LSPipe]>],
+ [2], [], -1>, // dynamic uops
//
// Preload
InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
@@ -397,14 +400,16 @@ def CortexA8Itineraries : MultiIssueItineraries<
InstrStage<1, [A8_NLSPipe], 0>,
InstrStage<1, [A8_LSPipe]>,
InstrStage<1, [A8_NLSPipe], 0>,
- InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1, 1, 2], [], -1>, // dynamic uops
//
// FP Load Multiple + update
InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NLSPipe], 0>,
InstrStage<1, [A8_LSPipe]>,
InstrStage<1, [A8_NLSPipe], 0>,
- InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>,
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1, 1, 1, 2], [], -1>, // dynamic uops
//
// Single-precision FP Store
InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
@@ -423,15 +428,16 @@ def CortexA8Itineraries : MultiIssueItineraries<
InstrStage<1, [A8_NLSPipe], 0>,
InstrStage<1, [A8_LSPipe]>,
InstrStage<1, [A8_NLSPipe], 0>,
- InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1, 1, 1], [], -1>, // dynamic uops
//
// FP Store Multiple + update
InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NLSPipe], 0>,
InstrStage<1, [A8_LSPipe]>,
InstrStage<1, [A8_NLSPipe], 0>,
- InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>,
-
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1], [], -1>, // dynamic uops
// NEON
// Issue through integer pipeline, and execute in NEON unit.
//
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index a00577bf3d..1677ba6a98 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -284,7 +284,8 @@ def CortexA9Itineraries : MultiIssueItineraries<
InstrStage<2, [A9_AGU], 1>,
InstrStage<2, [A9_LSUnit]>],
[1, 1, 1, 1, 3],
- [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
//
// Load multiple + update, defs are the 1st and 5th operands.
InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -292,7 +293,8 @@ def CortexA9Itineraries : MultiIssueItineraries<
InstrStage<2, [A9_AGU], 1>,
InstrStage<2, [A9_LSUnit]>],
[2, 1, 1, 1, 3],
- [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
//
// Load multiple plus branch
InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -301,7 +303,8 @@ def CortexA9Itineraries : MultiIssueItineraries<
InstrStage<2, [A9_LSUnit]>,
InstrStage<1, [A9_Branch]>],
[1, 2, 1, 1, 3],
- [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
//
// Pop, def is the 3rd operand.
InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -309,7 +312,8 @@ def CortexA9Itineraries : MultiIssueItineraries<
InstrStage<2, [A9_AGU], 1>,
InstrStage<2, [A9_LSUnit]>],
[1, 1, 3],
- [NoBypass, NoBypass, A9_LdBypass]>,
+ [NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
//
// Pop + branch, def is the 3rd operand.
InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -318,8 +322,8 @@ def CortexA9Itineraries : MultiIssueItineraries<
InstrStage<2, [A9_LSUnit]>,
InstrStage<1, [A9_Branch]>],
[1, 1, 3],
- [NoBypass, NoBypass, A9_LdBypass]>,
-
+ [NoBypass, NoBypass, A9_LdBypass],
+ -1>, // dynamic uops
//
// iLoadi + iALUr for t2LDRpci_pic.
InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -413,14 +417,15 @@ def CortexA9Itineraries : MultiIssueItineraries<
InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_AGU], 0>,
- InstrStage<2, [A9_LSUnit]>]>,
+ InstrStage<2, [A9_LSUnit]>],
+ [], [], -1>, // dynamic uops
//
// Store multiple + update
InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_AGU], 0>,
- InstrStage<2, [A9_LSUnit]>], [2]>,
-
+ InstrStage<2, [A9_LSUnit]>],
+ [2], [], -1>, // dynamic uops
//
// Preload
InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
@@ -717,7 +722,8 @@ def CortexA9Itineraries : MultiIssueItineraries<
InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1], [], -1>, // dynamic uops
//
// FP Load Multiple + update
// FIXME: assumes 2 doubles which requires 2 LS cycles.
@@ -726,7 +732,8 @@ def CortexA9Itineraries : MultiIssueItineraries<
InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1], [], -1>, // dynamic uops
//
// Single-precision FP Store
InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -753,7 +760,8 @@ def CortexA9Itineraries : MultiIssueItineraries<
InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1], [], -1>, // dynamic uops
//
// FP Store Multiple + update
// FIXME: assumes 2 doubles which requires 2 LS cycles.
@@ -762,7 +770,8 @@ def CortexA9Itineraries : MultiIssueItineraries<
InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1], [], -1>, // dynamic uops
// NEON
// VLD1
InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 99ed63293d..b12607b206 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -143,22 +143,22 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
bool ARMPassConfig::addPreISel() {
if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge)
- PM->add(createGlobalMergePass(TM->getTargetLowering()));
+ addPass(createGlobalMergePass(TM->getTargetLowering()));
return false;
}
bool ARMPassConfig::addInstSelector() {
- PM->add(createARMISelDag(getARMTargetMachine(), getOptLevel()));
+ addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
return false;
}
bool ARMPassConfig::addPreRegAlloc() {
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only())
- PM->add(createARMLoadStoreOptimizationPass(true));
+ addPass(createARMLoadStoreOptimizationPass(true));
if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9())
- PM->add(createMLxExpansionPass());
+ addPass(createMLxExpansionPass());
return true;
}
@@ -166,23 +166,23 @@ bool ARMPassConfig::addPreSched2() {
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (getOptLevel() != CodeGenOpt::None) {
if (!getARMSubtarget().isThumb1Only()) {
- PM->add(createARMLoadStoreOptimizationPass());
+ addPass(createARMLoadStoreOptimizationPass());
printAndVerify("After ARM load / store optimizer");
}
if (getARMSubtarget().hasNEON())
- PM->add(createExecutionDependencyFixPass(&ARM::DPRRegClass));
+ addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
}
// Expand some pseudo instructions into multiple instructions to allow
// proper scheduling.
- PM->add(createARMExpandPseudoPass());
+ addPass(createARMExpandPseudoPass());
if (getOptLevel() != CodeGenOpt::None) {
if (!getARMSubtarget().isThumb1Only())
- addPass(IfConverterID);
+ addPass(&IfConverterID);
}
if (getARMSubtarget().isThumb2())
- PM->add(createThumb2ITBlockPass());
+ addPass(createThumb2ITBlockPass());
return true;
}
@@ -190,10 +190,10 @@ bool ARMPassConfig::addPreSched2() {
bool ARMPassConfig::addPreEmitPass() {
if (getARMSubtarget().isThumb2()) {
if (!getARMSubtarget().prefers32BitThumb())
- PM->add(createThumb2SizeReductionPass());
+ addPass(createThumb2SizeReductionPass());
// Constant island pass work on unbundled instructions.
- addPass(UnpackMachineBundlesID);
+ addPass(&UnpackMachineBundlesID);
}
// @LOCALMOD-START
@@ -205,12 +205,12 @@ bool ARMPassConfig::addPreEmitPass() {
}
// @LOCALMOD-END
- PM->add(createARMConstantIslandPass());
+ addPass(createARMConstantIslandPass());
// @LOCALMOD-START
// This pass does all the heavy sfi lifting.
if (getARMSubtarget().isTargetNaCl()) {
- PM->add(createARMNaClRewritePass());
+ addPass(createARMNaClRewritePass());
}
// @LOCALMOD-END
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 00c495b89a..22db332f2b 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -24,20 +24,11 @@ using namespace dwarf;
void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
+ bool isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI();
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI();
+ InitializeELF(isAAPCS_ABI);
if (isAAPCS_ABI) {
- StaticCtorSection =
- getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
- ELF::SHF_WRITE |
- ELF::SHF_ALLOC,
- SectionKind::getDataRel());
- StaticDtorSection =
- getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
- ELF::SHF_WRITE |
- ELF::SHF_ALLOC,
- SectionKind::getDataRel());
//LSDASection = NULL;
}
@@ -47,33 +38,3 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
0,
SectionKind::getMetadata());
}
-
-const MCSection *
-ARMElfTargetObjectFile::getStaticCtorSection(unsigned Priority) const {
- if (!isAAPCS_ABI)
- return TargetLoweringObjectFileELF::getStaticCtorSection(Priority);
-
- if (Priority == 65535)
- return StaticCtorSection;
-
- // Emit ctors in priority order.
- std::string Name = std::string(".init_array.") + utostr(Priority);
- return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY,
- ELF::SHF_ALLOC | ELF::SHF_WRITE,
- SectionKind::getDataRel());
-}
-
-const MCSection *
-ARMElfTargetObjectFile::getStaticDtorSection(unsigned Priority) const {
- if (!isAAPCS_ABI)
- return TargetLoweringObjectFileELF::getStaticDtorSection(Priority);
-
- if (Priority == 65535)
- return StaticDtorSection;
-
- // Emit dtors in priority order.
- std::string Name = std::string(".fini_array.") + utostr(Priority);
- return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY,
- ELF::SHF_ALLOC | ELF::SHF_WRITE,
- SectionKind::getDataRel());
-}
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index ff21060414..c6a7261439 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -20,7 +20,6 @@ class TargetMachine;
class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
protected:
const MCSection *AttributesSection;
- bool isAAPCS_ABI;
public:
ARMElfTargetObjectFile() :
TargetLoweringObjectFileELF(),
@@ -32,9 +31,6 @@ public:
virtual const MCSection *getAttributesSection() const {
return AttributesSection;
}
-
- const MCSection * getStaticCtorSection(unsigned Priority) const;
- const MCSection * getStaticDtorSection(unsigned Priority) const;
};
} // end namespace llvm
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 2fae489371..68f128189f 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -236,7 +236,10 @@ public:
Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY,
Match_RequiresNotITBlock,
Match_RequiresV6,
- Match_RequiresThumb2
+ Match_RequiresThumb2,
+#define GET_OPERAND_DIAGNOSTIC_TYPES
+#include "ARMGenAsmMatcher.inc"
+
};
ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
@@ -3253,10 +3256,11 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
- assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ if (!Tok.is(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
StringRef OptStr = Tok.getString();
- unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()))
+ unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower())
.Case("sy", ARM_MB::SY)
.Case("st", ARM_MB::ST)
.Case("sh", ARM_MB::ISH)
@@ -3284,7 +3288,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
- assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ if (!Tok.is(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
StringRef IFlagsStr = Tok.getString();
// An iflags string of "none" is interpreted to mean that none of the AIF
@@ -3353,22 +3358,22 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
.Case("xpsr_nzcvq", 0x803)
.Case("xpsr_g", 0x403)
.Case("xpsr_nzcvqg", 0xc03)
- .Case("ipsr", 5)
- .Case("epsr", 6)
- .Case("iepsr", 7)
- .Case("msp", 8)
- .Case("psp", 9)
- .Case("primask", 16)
- .Case("basepri", 17)
- .Case("basepri_max", 18)
- .Case("faultmask", 19)
- .Case("control", 20)
+ .Case("ipsr", 0x805)
+ .Case("epsr", 0x806)
+ .Case("iepsr", 0x807)
+ .Case("msp", 0x808)
+ .Case("psp", 0x809)
+ .Case("primask", 0x810)
+ .Case("basepri", 0x811)
+ .Case("basepri_max", 0x812)
+ .Case("faultmask", 0x813)
+ .Case("control", 0x814)
.Default(~0U);
if (FlagsVal == ~0U)
return MatchOperand_NoMatch;
- if (!hasV7Ops() && FlagsVal >= 17 && FlagsVal <= 19)
+ if (!hasV7Ops() && FlagsVal >= 0x811 && FlagsVal <= 0x813)
// basepri, basepri_max and faultmask only valid for V7m.
return MatchOperand_NoMatch;
@@ -7410,6 +7415,11 @@ MatchAndEmitInstruction(SMLoc IDLoc,
return Error(IDLoc, "instruction variant requires ARMv6 or later");
case Match_RequiresThumb2:
return Error(IDLoc, "instruction variant requires Thumb2");
+ case Match_ImmRange0_15: {
+ SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ return Error(ErrorLoc, "immediate operand must be in the range [0,15]");
+ }
}
llvm_unreachable("Implement any new match types added!");
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 92c5d92ff7..bf74a9df3b 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -51,6 +51,8 @@ add_llvm_target(ARMCodeGen
Thumb2SizeReduction.cpp
)
+add_dependencies(LLVMARMCodeGen intrinsics_gen)
+
# workaround for hanging compilation on MSVC9, 10
if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 )
set_property(
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 9eda04d776..e97f4c7430 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -52,6 +52,27 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
StringRef Annot) {
unsigned Opcode = MI->getOpcode();
+ // Check for HINT instructions w/ canonical names.
+ if (Opcode == ARM::HINT || Opcode == ARM::t2HINT) {
+ switch (MI->getOperand(0).getImm()) {
+ case 0: O << "\tnop"; break;
+ case 1: O << "\tyield"; break;
+ case 2: O << "\twfe"; break;
+ case 3: O << "\twfi"; break;
+ case 4: O << "\tsev"; break;
+ default:
+ // Anything else should just print normally.
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+ printPredicateOperand(MI, 1, O);
+ if (Opcode == ARM::t2HINT)
+ O << ".w";
+ printAnnotation(O, Annot);
+ return;
+ }
+
// Check for MOVs and print canonical forms, instead.
if (Opcode == ARM::MOVsr) {
// FIXME: Thumb variants?
@@ -736,16 +757,26 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
case 0x803: O << "xpsr"; return; // with _nzcvq bits is an alias for xpsr
case 0x403: O << "xpsr_g"; return;
case 0xc03: O << "xpsr_nzcvqg"; return;
- case 5: O << "ipsr"; return;
- case 6: O << "epsr"; return;
- case 7: O << "iepsr"; return;
- case 8: O << "msp"; return;
- case 9: O << "psp"; return;
- case 16: O << "primask"; return;
- case 17: O << "basepri"; return;
- case 18: O << "basepri_max"; return;
- case 19: O << "faultmask"; return;
- case 20: O << "control"; return;
+ case 5:
+ case 0x805: O << "ipsr"; return;
+ case 6:
+ case 0x806: O << "epsr"; return;
+ case 7:
+ case 0x807: O << "iepsr"; return;
+ case 8:
+ case 0x808: O << "msp"; return;
+ case 9:
+ case 0x809: O << "psp"; return;
+ case 0x10:
+ case 0x810: O << "primask"; return;
+ case 0x11:
+ case 0x811: O << "basepri"; return;
+ case 0x12:
+ case 0x812: O << "basepri_max"; return;
+ case 0x13:
+ case 0x813: O << "faultmask"; return;
+ case 0x14:
+ case 0x814: O << "control"; return;
}
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 8dee1b1d6a..4d922d9b44 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -179,9 +179,8 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
break;
}
break;
- case ARM::fixup_arm_uncondbl:
case ARM::fixup_arm_blx:
- case ARM::fixup_arm_uncondbranch:
+ case ARM::fixup_arm_uncondbl:
switch (Modifier) {
case MCSymbolRefExpr::VK_ARM_PLT:
Type = ELF::R_ARM_PLT32;
@@ -193,6 +192,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
break;
case ARM::fixup_arm_condbl:
case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
Type = ELF::R_ARM_JUMP24;
break;
case ARM::fixup_arm_movt_hi16:
@@ -253,10 +253,8 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case ARM::fixup_arm_thumb_cp:
case ARM::fixup_arm_thumb_br:
llvm_unreachable("Unimplemented");
- case ARM::fixup_arm_uncondbranch:
- Type = ELF::R_ARM_CALL;
- break;
case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
Type = ELF::R_ARM_JUMP24;
break;
case ARM::fixup_arm_movt_hi16:
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
index cf4f796ec2..1f8ca8681c 100644
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -24,5 +24,7 @@ add_llvm_target(CellSPUCodeGen
SPUNopFiller.cpp
)
+add_dependencies(LLVMCellSPUCodeGen intrinsics_gen)
+
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp
index 14021fef05..03d5a9ae0c 100644
--- a/lib/Target/CellSPU/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp
@@ -301,7 +301,9 @@ bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
if (ExtraCode[1] != 0) return true; // Unknown modifier.
switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
case 'L': // Write second word of DImode reference.
// Verify that this operand has two consecutive registers.
if (!MI->getOperand(OpNo).isReg() ||
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 3b90261fe6..54764f133c 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -72,7 +72,7 @@ TargetPassConfig *SPUTargetMachine::createPassConfig(PassManagerBase &PM) {
bool SPUPassConfig::addInstSelector() {
// Install an instruction selector.
- PM->add(createSPUISelDag(getSPUTargetMachine()));
+ addPass(createSPUISelDag(getSPUTargetMachine()));
return false;
}
@@ -85,9 +85,9 @@ bool SPUPassConfig::addPreEmitPass() {
(BuilderFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol(
"createTCESchedulerPass");
if (schedulerCreator != NULL)
- PM->add(schedulerCreator("cellspu"));
+ addPass(schedulerCreator("cellspu"));
//align instructions with nops/lnops for dual issue
- PM->add(createSPUNopFillerPass(getSPUTargetMachine()));
+ addPass(createSPUNopFillerPass(getSPUTargetMachine()));
return true;
}
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index cd2ebcb508..c8e757becc 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -130,6 +130,7 @@ namespace {
private:
void printLinkageType(GlobalValue::LinkageTypes LT);
void printVisibilityType(GlobalValue::VisibilityTypes VisTypes);
+ void printThreadLocalMode(GlobalVariable::ThreadLocalMode TLM);
void printCallingConv(CallingConv::ID cc);
void printEscapedString(const std::string& str);
void printCFP(const ConstantFP* CFP);
@@ -325,6 +326,26 @@ void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) {
}
}
+void CppWriter::printThreadLocalMode(GlobalVariable::ThreadLocalMode TLM) {
+ switch (TLM) {
+ case GlobalVariable::NotThreadLocal:
+ Out << "GlobalVariable::NotThreadLocal";
+ break;
+ case GlobalVariable::GeneralDynamicTLSModel:
+ Out << "GlobalVariable::GeneralDynamicTLSModel";
+ break;
+ case GlobalVariable::LocalDynamicTLSModel:
+ Out << "GlobalVariable::LocalDynamicTLSModel";
+ break;
+ case GlobalVariable::InitialExecTLSModel:
+ Out << "GlobalVariable::InitialExecTLSModel";
+ break;
+ case GlobalVariable::LocalExecTLSModel:
+ Out << "GlobalVariable::LocalExecTLSModel";
+ break;
+ }
+}
+
// printEscapedString - Print each character of the specified string, escaping
// it if it is not printable or if it is an escape char.
void CppWriter::printEscapedString(const std::string &Str) {
@@ -996,7 +1017,9 @@ void CppWriter::printVariableHead(const GlobalVariable *GV) {
}
if (GV->isThreadLocal()) {
printCppName(GV);
- Out << "->setThreadLocal(true);";
+ Out << "->setThreadLocalMode(";
+ printThreadLocalMode(GV->getThreadLocalMode());
+ Out << ");";
nl(Out);
}
if (is_inline) {
@@ -2078,7 +2101,9 @@ char CppWriter::ID = 0;
bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &o,
CodeGenFileType FileType,
- bool DisableVerify) {
+ bool DisableVerify,
+ AnalysisID StartAfter,
+ AnalysisID StopAfter) {
if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
PM.add(new CppWriter(o));
return false;
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 92bca6c3c7..9cbe7981a9 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -31,7 +31,9 @@ struct CPPTargetMachine : public TargetMachine {
virtual bool addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &Out,
CodeGenFileType FileType,
- bool DisableVerify);
+ bool DisableVerify,
+ AnalysisID StartAfter,
+ AnalysisID StopAfter);
virtual const TargetData *getTargetData() const { return 0; }
};
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index 3db17484b0..1f2d8accbb 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -32,6 +32,8 @@ add_llvm_target(HexagonCodeGen
HexagonNewValueJump.cpp
)
+add_dependencies(LLVMHexagonCodeGen intrinsics_gen)
+
add_subdirectory(TargetInfo)
add_subdirectory(InstPrinter)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 0dc243f2b8..5fa4740f2a 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -133,7 +133,9 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
if (ExtraCode[1] != 0) return true; // Unknown modifier.
switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS);
case 'c': // Don't print "$" before a global var name or constant.
// Hexagon never has a prefix.
printOperand(MI, OpNo, OS);
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 7de27f74e2..a7b291ff2a 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -102,47 +102,47 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
}
bool HexagonPassConfig::addInstSelector() {
- PM->add(createHexagonRemoveExtendOps(getHexagonTargetMachine()));
- PM->add(createHexagonISelDag(getHexagonTargetMachine()));
- PM->add(createHexagonPeephole());
+ addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine()));
+ addPass(createHexagonISelDag(getHexagonTargetMachine()));
+ addPass(createHexagonPeephole());
return false;
}
bool HexagonPassConfig::addPreRegAlloc() {
if (!DisableHardwareLoops) {
- PM->add(createHexagonHardwareLoops());
+ addPass(createHexagonHardwareLoops());
}
return false;
}
bool HexagonPassConfig::addPostRegAlloc() {
- PM->add(createHexagonCFGOptimizer(getHexagonTargetMachine()));
+ addPass(createHexagonCFGOptimizer(getHexagonTargetMachine()));
return true;
}
bool HexagonPassConfig::addPreSched2() {
- addPass(IfConverterID);
+ addPass(&IfConverterID);
return true;
}
bool HexagonPassConfig::addPreEmitPass() {
if (!DisableHardwareLoops) {
- PM->add(createHexagonFixupHwLoops());
+ addPass(createHexagonFixupHwLoops());
}
- PM->add(createHexagonNewValueJump());
+ addPass(createHexagonNewValueJump());
// Expand Spill code for predicate registers.
- PM->add(createHexagonExpandPredSpillCode(getHexagonTargetMachine()));
+ addPass(createHexagonExpandPredSpillCode(getHexagonTargetMachine()));
// Split up TFRcondsets into conditional transfers.
- PM->add(createHexagonSplitTFRCondSets(getHexagonTargetMachine()));
+ addPass(createHexagonSplitTFRCondSets(getHexagonTargetMachine()));
// Create Packets.
- PM->add(createHexagonPacketizer());
+ addPass(createHexagonPacketizer());
return false;
}
diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt
index bf1deef491..6c3e8b6447 100644
--- a/lib/Target/MBlaze/CMakeLists.txt
+++ b/lib/Target/MBlaze/CMakeLists.txt
@@ -30,6 +30,8 @@ add_llvm_target(MBlazeCodeGen
MBlazeELFWriterInfo.cpp
)
+add_dependencies(LLVMMBlazeCodeGen intrinsics_gen)
+
add_subdirectory(AsmParser)
add_subdirectory(Disassembler)
add_subdirectory(InstPrinter)
diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td
index b4edff0709..c2888553c5 100644
--- a/lib/Target/MBlaze/MBlaze.td
+++ b/lib/Target/MBlaze/MBlaze.td
@@ -50,7 +50,7 @@ def FeatureSqrt : SubtargetFeature<"sqrt", "HasSqrt", "true",
// MBlaze processors supported.
//===----------------------------------------------------------------------===//
-def : Processor<"mblaze", MBlazeGenericItineraries, []>;
+def : Processor<"mblaze", NoItineraries, []>;
def : Processor<"mblaze3", MBlazePipe3Itineraries, []>;
def : Processor<"mblaze5", MBlazePipe5Itineraries, []>;
diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
index 7269697ac2..e9f340f2f6 100644
--- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -200,7 +200,13 @@ PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0])
- return true; // Unknown modifier.
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ }
printOperand(MI, OpNo, O);
return false;
diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td
index 4a3ae5fc14..cd5691ce64 100644
--- a/lib/Target/MBlaze/MBlazeSchedule.td
+++ b/lib/Target/MBlaze/MBlazeSchedule.td
@@ -40,11 +40,6 @@ def IIC_WDC : InstrItinClass;
def IIC_Pseudo : InstrItinClass;
//===----------------------------------------------------------------------===//
-// MBlaze generic instruction itineraries.
-//===----------------------------------------------------------------------===//
-def MBlazeGenericItineraries : ProcessorItineraries<[], [], []>;
-
-//===----------------------------------------------------------------------===//
// MBlaze instruction itineraries for three stage pipeline.
//===----------------------------------------------------------------------===//
include "MBlazeSchedule3.td"
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 62393d0920..5f82f14203 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -68,7 +68,7 @@ TargetPassConfig *MBlazeTargetMachine::createPassConfig(PassManagerBase &PM) {
// Install an instruction selector pass using
// the ISelDag to gen MBlaze code.
bool MBlazePassConfig::addInstSelector() {
- PM->add(createMBlazeISelDag(getMBlazeTargetMachine()));
+ addPass(createMBlazeISelDag(getMBlazeTargetMachine()));
return false;
}
@@ -76,6 +76,6 @@ bool MBlazePassConfig::addInstSelector() {
// machine code is emitted. return true if -print-machineinstrs should
// print out the code after the passes.
bool MBlazePassConfig::addPreEmitPass() {
- PM->add(createMBlazeDelaySlotFillerPass(getMBlazeTargetMachine()));
+ addPass(createMBlazeDelaySlotFillerPass(getMBlazeTargetMachine()));
return true;
}
diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt
index a8f9b52746..f9ecaed83a 100644
--- a/lib/Target/MSP430/CMakeLists.txt
+++ b/lib/Target/MSP430/CMakeLists.txt
@@ -23,6 +23,8 @@ add_llvm_target(MSP430CodeGen
MSP430MCInstLower.cpp
)
+add_dependencies(LLVMMSP430CodeGen intrinsics_gen)
+
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 3acf96bb7d..817001d6ad 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -60,12 +60,12 @@ TargetPassConfig *MSP430TargetMachine::createPassConfig(PassManagerBase &PM) {
bool MSP430PassConfig::addInstSelector() {
// Install an instruction selector.
- PM->add(createMSP430ISelDag(getMSP430TargetMachine(), getOptLevel()));
+ addPass(createMSP430ISelDag(getMSP430TargetMachine(), getOptLevel()));
return false;
}
bool MSP430PassConfig::addPreEmitPass() {
// Must run branch selection immediately preceding the asm printer.
- PM->add(createMSP430BranchSelectionPass());
+ addPass(createMSP430BranchSelectionPass());
return false;
}
diff --git a/lib/Target/Mips/AsmParser/CMakeLists.txt b/lib/Target/Mips/AsmParser/CMakeLists.txt
index ac21c259fb..6c7343bbe5 100644
--- a/lib/Target/Mips/AsmParser/CMakeLists.txt
+++ b/lib/Target/Mips/AsmParser/CMakeLists.txt
@@ -1,6 +1,5 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
add_llvm_library(LLVMMipsAsmParser
MipsAsmParser.cpp
)
+add_dependencies(LLVMMipsAsmParser MipsCommonTableGen)
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index bccb5099ef..e9a228c331 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -22,6 +22,7 @@ add_llvm_target(MipsCodeGen
MipsISelDAGToDAG.cpp
MipsISelLowering.cpp
MipsFrameLowering.cpp
+ MipsLongBranch.cpp
MipsMCInstLower.cpp
MipsMachineFunction.cpp
MipsRegisterInfo.cpp
@@ -31,6 +32,8 @@ add_llvm_target(MipsCodeGen
MipsSelectionDAGInfo.cpp
)
+add_dependencies(LLVMMipsCodeGen intrinsics_gen)
+
add_subdirectory(InstPrinter)
add_subdirectory(Disassembler)
add_subdirectory(TargetInfo)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index b8fe772544..9c5d31e21c 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -37,6 +37,10 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case FK_GPRel_4:
case FK_Data_4:
case Mips::fixup_Mips_LO16:
+ case Mips::fixup_Mips_GPOFF_HI:
+ case Mips::fixup_Mips_GPOFF_LO:
+ case Mips::fixup_Mips_GOT_PAGE:
+ case Mips::fixup_Mips_GOT_OFST:
break;
case Mips::fixup_Mips_PC16:
// So far we are only using this type for branches.
@@ -75,10 +79,8 @@ public:
:MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle), Is64Bit(_is64Bit) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- // @LOCALMOD-BEGIN-UPSTREAM
return createMipsELFObjectWriter(OS,
MCELFObjectTargetWriter::getOSABI(OSType), IsLittle, Is64Bit);
- // @LOCALMOD-END-UPSTREAM
}
/// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided
@@ -119,7 +121,8 @@ public:
CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i*8);
}
- uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize));
+ uint64_t Mask = ((uint64_t)(-1) >>
+ (64 - getFixupKindInfo(Kind).TargetSize));
CurVal |= Value & Mask;
// Write out the fixed up bytes back to the code/data bits.
@@ -160,7 +163,11 @@ public:
{ "fixup_Mips_TLSLDM", 0, 16, 0 },
{ "fixup_Mips_DTPREL_HI", 0, 16, 0 },
{ "fixup_Mips_DTPREL_LO", 0, 16, 0 },
- { "fixup_Mips_Branch_PCRel", 0, 16, MCFixupKindInfo::FKF_IsPCRel }
+ { "fixup_Mips_Branch_PCRel", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Mips_GPOFF_HI", 0, 16, 0 },
+ { "fixup_Mips_GPOFF_LO", 0, 16, 0 },
+ { "fixup_Mips_GOT_PAGE", 0, 16, 0 },
+ { "fixup_Mips_GOT_OFST", 0, 16, 0 }
};
if (Kind < FirstTargetFixupKind)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 2091bec500..9f9272886e 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -34,7 +34,7 @@ namespace {
class MipsELFObjectWriter : public MCELFObjectTargetWriter {
public:
- MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI);
+ MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, bool _isN64);
virtual ~MipsELFObjectWriter();
@@ -52,9 +52,11 @@ namespace {
};
}
-MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI)
+MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
+ bool _isN64)
: MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS,
- /*HasRelocationAddend*/ false) {}
+ /*HasRelocationAddend*/ false,
+ /*IsN64*/ _isN64) {}
MipsELFObjectWriter::~MipsELFObjectWriter() {}
@@ -148,8 +150,23 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
case Mips::fixup_Mips_PC16:
Type = ELF::R_MIPS_PC16;
break;
+ case Mips::fixup_Mips_GOT_PAGE:
+ Type = ELF::R_MIPS_GOT_PAGE;
+ break;
+ case Mips::fixup_Mips_GOT_OFST:
+ Type = ELF::R_MIPS_GOT_OFST;
+ break;
+ case Mips::fixup_Mips_GPOFF_HI:
+ Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type);
+ Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type);
+ Type = setRType3((unsigned)ELF::R_MIPS_HI16, Type);
+ break;
+ case Mips::fixup_Mips_GPOFF_LO:
+ Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type);
+ Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type);
+ Type = setRType3((unsigned)ELF::R_MIPS_LO16, Type);
+ break;
}
-
return Type;
}
@@ -184,10 +201,10 @@ static int CompareOffset(const RelEntry &R0, const RelEntry &R1) {
void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
std::vector<ELFRelocationEntry> &Relocs) {
- // Call the defualt function first. Relocations are sorted in descending
+ // Call the default function first. Relocations are sorted in descending
// order of r_offset.
MCELFObjectTargetWriter::sortRelocs(Asm, Relocs);
-
+
RelLs RelocLs;
std::vector<RelLsIter> Unmatched;
@@ -244,6 +261,7 @@ MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS,
uint8_t OSABI,
bool IsLittleEndian,
bool Is64Bit) {
- MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI);
+ MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI,
+ (Is64Bit) ? true : false);
return createELFObjectWriter(MOTW, OS, IsLittleEndian);
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index 9b76eda861..1f6000cc8c 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -95,6 +95,18 @@ namespace Mips {
// PC relative branch fixup resulting in - R_MIPS_PC16
fixup_Mips_Branch_PCRel,
+ // resulting in - R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16
+ fixup_Mips_GPOFF_HI,
+
+ // resulting in - R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16
+ fixup_Mips_GPOFF_LO,
+
+ // resulting in - R_MIPS_PAGE
+ fixup_Mips_GOT_PAGE,
+
+ // resulting in - R_MIPS_GOT_OFST
+ fixup_Mips_GOT_OFST,
+
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 3b0e59b87a..8ab2edeca0 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -187,7 +187,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
} else if (MO.isFPImm()) {
return static_cast<unsigned>(APFloat(MO.getFPImm())
.bitcastToAPInt().getHiBits(32).getLimitedValue());
- }
+ }
// MO must be an Expr.
assert(MO.isExpr());
@@ -201,10 +201,27 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
}
assert (Kind == MCExpr::SymbolRef);
-
+
Mips::Fixups FixupKind = Mips::Fixups(0);
switch(cast<MCSymbolRefExpr>(Expr)->getKind()) {
+ default: llvm_unreachable("Unknown fixup kind!");
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOT_DISP :
+ llvm_unreachable("fixup kind VK_Mips_GOT_DISP not supported for direct object!");
+ break;
+ case MCSymbolRefExpr::VK_Mips_GPOFF_HI :
+ FixupKind = Mips::fixup_Mips_GPOFF_HI;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GPOFF_LO :
+ FixupKind = Mips::fixup_Mips_GPOFF_LO;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOT_PAGE :
+ FixupKind = Mips::fixup_Mips_GOT_PAGE;
+ break;
+ case MCSymbolRefExpr::VK_Mips_GOT_OFST :
+ FixupKind = Mips::fixup_Mips_GOT_OFST;
+ break;
case MCSymbolRefExpr::VK_Mips_GPREL:
FixupKind = Mips::fixup_Mips_GPREL16;
break;
@@ -244,8 +261,6 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
case MCSymbolRefExpr::VK_Mips_TPREL_LO:
FixupKind = Mips::fixup_Mips_TPREL_LO;
break;
- default:
- break;
} // switch
Fixups.push_back(MCFixup::Create(0, MO.getExpr(), MCFixupKind(FixupKind)));
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index ed61b642fc..411030aaa1 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -34,6 +34,7 @@ namespace llvm {
FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
+ FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
JITCodeEmitter &JCE);
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index fc530939ed..2e0239377d 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -11,19 +11,29 @@
//
//===----------------------------------------------------------------------===//
+class Mips16Pat<dag pattern, dag result> : Pat<pattern, result> {
+ let Predicates = [InMips16Mode];
+}
+
+def LI16E : FEXT_RI16<0b01101, (outs CPU16Regs:$rx),
+ (ins uimm16:$amt),
+ !strconcat("li", "\t$rx, $amt"),
+ [(set CPU16Regs:$rx, immZExt16:$amt )],IILoad>;
+
let isReturn=1, isTerminator=1, hasDelaySlot=1, isCodeGenOnly=1,
isBarrier=1, hasCtrlDep=1, rx=0, nd=0, l=0, ra=0 in
-def RET16 : FRR16_JALRC < (outs), (ins CPURAReg:$target),
- "jr\t$target", [(MipsRet CPURAReg:$target)], IIBranch>;
+def RET16 : FRR16_JALRC
+ < (outs), (ins CPURAReg:$target),
+ "jr\t$target", [(MipsRet CPURAReg:$target)], IIBranch>;
// As stack alignment is always done with addiu, we need a 16-bit immediate
let Defs = [SP], Uses = [SP] in {
def ADJCALLSTACKDOWN16 : MipsPseudo16<(outs), (ins uimm16:$amt),
- "!ADJCALLSTACKDOWN $amt",
- [(callseq_start timm:$amt)]>;
+ "!ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
def ADJCALLSTACKUP16 : MipsPseudo16<(outs), (ins uimm16:$amt1, uimm16:$amt2),
- "!ADJCALLSTACKUP $amt1",
- [(callseq_end timm:$amt1, timm:$amt2)]>;
+ "!ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
}
@@ -31,4 +41,7 @@ def ADJCALLSTACKUP16 : MipsPseudo16<(outs), (ins uimm16:$amt1, uimm16:$amt2),
let isCall=1, hasDelaySlot=1, nd=0, l=0, ra=0 in
def JumpLinkReg16:
FRR16_JALRC<(outs), (ins CPU16Regs:$rs, variable_ops),
- "jalr \t$rs", [(MipsJmpLink CPU16Regs:$rs)], IIBranch>;
+ "jalr \t$rs", [(MipsJmpLink CPU16Regs:$rs)], IIBranch>;
+
+// Small immediates
+def : Mips16Pat<(i32 immZExt16:$in), (LI16E immZExt16:$in)>;
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 7e129b8b8d..a5a3038827 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -230,47 +230,49 @@ def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt),
// extended loads
let Predicates = [NotN64, HasStandardEncoding] in {
- def : Pat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>;
- def : Pat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>;
- def : Pat<(i64 (extloadi16_a addr:$src)), (LH64 addr:$src)>;
- def : Pat<(i64 (extloadi16_u addr:$src)), (ULH64 addr:$src)>;
- def : Pat<(i64 (extloadi32_a addr:$src)), (LW64 addr:$src)>;
- def : Pat<(i64 (extloadi32_u addr:$src)), (ULW64 addr:$src)>;
- def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>;
+ def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>;
+ def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>;
+ def : MipsPat<(i64 (extloadi16_a addr:$src)), (LH64 addr:$src)>;
+ def : MipsPat<(i64 (extloadi16_u addr:$src)), (ULH64 addr:$src)>;
+ def : MipsPat<(i64 (extloadi32_a addr:$src)), (LW64 addr:$src)>;
+ def : MipsPat<(i64 (extloadi32_u addr:$src)), (ULW64 addr:$src)>;
+ def : MipsPat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>;
}
let Predicates = [IsN64, HasStandardEncoding] in {
- def : Pat<(i64 (extloadi1 addr:$src)), (LB64_P8 addr:$src)>;
- def : Pat<(i64 (extloadi8 addr:$src)), (LB64_P8 addr:$src)>;
- def : Pat<(i64 (extloadi16_a addr:$src)), (LH64_P8 addr:$src)>;
- def : Pat<(i64 (extloadi16_u addr:$src)), (ULH64_P8 addr:$src)>;
- def : Pat<(i64 (extloadi32_a addr:$src)), (LW64_P8 addr:$src)>;
- def : Pat<(i64 (extloadi32_u addr:$src)), (ULW64_P8 addr:$src)>;
- def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>;
+ def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64_P8 addr:$src)>;
+ def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64_P8 addr:$src)>;
+ def : MipsPat<(i64 (extloadi16_a addr:$src)), (LH64_P8 addr:$src)>;
+ def : MipsPat<(i64 (extloadi16_u addr:$src)), (ULH64_P8 addr:$src)>;
+ def : MipsPat<(i64 (extloadi32_a addr:$src)), (LW64_P8 addr:$src)>;
+ def : MipsPat<(i64 (extloadi32_u addr:$src)), (ULW64_P8 addr:$src)>;
+ def : MipsPat<(zextloadi32_u addr:$a),
+ (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>;
}
// hi/lo relocs
-def : Pat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>;
-def : Pat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>;
-def : Pat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>;
-def : Pat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>;
-def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>;
-
-def : Pat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>;
-def : Pat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>;
-def : Pat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>;
-def : Pat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>;
-def : Pat<(MipsLo tglobaltlsaddr:$in), (DADDiu ZERO_64, tglobaltlsaddr:$in)>;
-
-def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)),
- (DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>;
-def : Pat<(add CPU64Regs:$hi, (MipsLo tblockaddress:$lo)),
- (DADDiu CPU64Regs:$hi, tblockaddress:$lo)>;
-def : Pat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)),
- (DADDiu CPU64Regs:$hi, tjumptable:$lo)>;
-def : Pat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)),
- (DADDiu CPU64Regs:$hi, tconstpool:$lo)>;
-def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaltlsaddr:$lo)),
- (DADDiu CPU64Regs:$hi, tglobaltlsaddr:$lo)>;
+def : MipsPat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>;
+def : MipsPat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>;
+def : MipsPat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>;
+def : MipsPat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>;
+def : MipsPat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>;
+
+def : MipsPat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>;
+def : MipsPat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>;
+def : MipsPat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>;
+def : MipsPat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>;
+def : MipsPat<(MipsLo tglobaltlsaddr:$in),
+ (DADDiu ZERO_64, tglobaltlsaddr:$in)>;
+
+def : MipsPat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)),
+ (DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>;
+def : MipsPat<(add CPU64Regs:$hi, (MipsLo tblockaddress:$lo)),
+ (DADDiu CPU64Regs:$hi, tblockaddress:$lo)>;
+def : MipsPat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)),
+ (DADDiu CPU64Regs:$hi, tjumptable:$lo)>;
+def : MipsPat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)),
+ (DADDiu CPU64Regs:$hi, tconstpool:$lo)>;
+def : MipsPat<(add CPU64Regs:$hi, (MipsLo tglobaltlsaddr:$lo)),
+ (DADDiu CPU64Regs:$hi, tglobaltlsaddr:$lo)>;
def : WrapperPat<tglobaladdr, DADDiu, CPU64Regs>;
def : WrapperPat<tconstpool, DADDiu, CPU64Regs>;
@@ -290,21 +292,22 @@ defm : SetgePats<CPU64Regs, SLT64, SLTu64>;
defm : SetgeImmPats<CPU64Regs, SLTi64, SLTiu64>;
// select MipsDynAlloc
-def : Pat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>,
- Requires<[IsN64, HasStandardEncoding]>;
+def : MipsPat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>,
+ Requires<[IsN64, HasStandardEncoding]>;
// truncate
-def : Pat<(i32 (trunc CPU64Regs:$src)),
- (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>,
- Requires<[IsN64, HasStandardEncoding]>;
+def : MipsPat<(i32 (trunc CPU64Regs:$src)),
+ (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>,
+ Requires<[IsN64, HasStandardEncoding]>;
// 32-to-64-bit extension
-def : Pat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
-def : Pat<(i64 (zext CPURegs:$src)), (DSRL (DSLL64_32 CPURegs:$src), 32)>;
-def : Pat<(i64 (sext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
+def : MipsPat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
+def : MipsPat<(i64 (zext CPURegs:$src)), (DSRL (DSLL64_32 CPURegs:$src), 32)>;
+def : MipsPat<(i64 (sext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
// Sign extend in register
-def : Pat<(i64 (sext_inreg CPU64Regs:$src, i32)), (SLL64_64 CPU64Regs:$src)>;
+def : MipsPat<(i64 (sext_inreg CPU64Regs:$src, i32)),
+ (SLL64_64 CPU64Regs:$src)>;
-// bswap pattern
-def : Pat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>;
+// bswap MipsPattern
+def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index b09c51179a..7167190f21 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -18,12 +18,12 @@
#include "MipsInstrInfo.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/Instructions.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Instructions.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -58,9 +58,14 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
- MCInst TmpInst0;
- MCInstLowering.Lower(MI, TmpInst0);
- OutStreamer.EmitInstruction(TmpInst0);
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+
+ do {
+ MCInst TmpInst0;
+ MCInstLowering.Lower(I++, TmpInst0);
+ OutStreamer.EmitInstruction(TmpInst0);
+ } while ((I != E) && I->isInsideBundle());
}
//===----------------------------------------------------------------------===//
@@ -236,15 +241,6 @@ void MipsAsmPrinter::EmitFunctionBodyStart() {
if (MipsFI->getEmitNOAT())
OutStreamer.EmitRawText(StringRef("\t.set\tnoat"));
}
-
- if ((MF->getTarget().getRelocationModel() == Reloc::PIC_) &&
- Subtarget->isABI_O32() && MipsFI->globalBaseRegSet()) {
- SmallVector<MCInst, 4> MCInsts;
- MCInstLowering.LowerSETGP01(MCInsts);
- for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin();
- I != MCInsts.end(); ++I)
- OutStreamer.EmitInstruction(*I);
- }
}
/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
@@ -316,7 +312,8 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
const MachineOperand &MO = MI->getOperand(OpNum);
switch (ExtraCode[0]) {
default:
- return true; // Unknown modifier.
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI,OpNum,AsmVariant,ExtraCode,O);
case 'X': // hex const int
if ((MO.getType()) != MachineOperand::MO_Immediate)
return true;
@@ -337,6 +334,17 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return true;
O << MO.getImm() - 1;
return false;
+ case 'z': {
+ // $0 if zero, regular printing otherwise
+ if (MO.getType() != MachineOperand::MO_Immediate)
+ return true;
+ int64_t Val = MO.getImm();
+ if (Val)
+ O << Val;
+ else
+ O << "$0";
+ return false;
+ }
}
}
@@ -349,11 +357,12 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
const char *ExtraCode,
raw_ostream &O) {
if (ExtraCode && ExtraCode[0])
- return true; // Unknown modifier.
+ return true; // Unknown modifier.
const MachineOperand &MO = MI->getOperand(OpNum);
assert(MO.isReg() && "unexpected inline asm memory operand");
O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
+
return false;
}
@@ -401,7 +410,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
break;
case MachineOperand::MO_BlockAddress: {
- MCSymbol* BA = GetBlockAddressSymbol(MO.getBlockAddress());
+ MCSymbol *BA = GetBlockAddressSymbol(MO.getBlockAddress());
O << BA->getName();
break;
}
@@ -462,7 +471,7 @@ printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O) {
void MipsAsmPrinter::
printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier) {
- const MachineOperand& MO = MI->getOperand(opNum);
+ const MachineOperand &MO = MI->getOperand(opNum);
O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm());
}
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 4b7e1d3766..8aadefdcd1 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -145,6 +145,58 @@ def RetCC_MipsEABI : CallingConv<[
]>;
//===----------------------------------------------------------------------===//
+// Mips FastCC Calling Convention
+//===----------------------------------------------------------------------===//
+def CC_MipsO32_FastCC : CallingConv<[
+ // f64 arguments are passed in double-precision floating pointer registers.
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7, D8, D9]>>,
+
+ // Stack parameter slots for f64 are 64-bit doublewords and 8-byte aligned.
+ CCIfType<[f64], CCAssignToStack<8, 8>>
+]>;
+
+def CC_MipsN_FastCC : CallingConv<[
+ // Integer arguments are passed in integer registers.
+ CCIfType<[i64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, T0_64, T1_64,
+ T2_64, T3_64, T4_64, T5_64, T6_64, T7_64,
+ T8_64, V1_64]>>,
+
+ // f64 arguments are passed in double-precision floating pointer registers.
+ CCIfType<[f64], CCAssignToReg<[D0_64, D1_64, D2_64, D3_64, D4_64, D5_64,
+ D6_64, D7_64, D8_64, D9_64, D10_64, D11_64,
+ D12_64, D13_64, D14_64, D15_64, D16_64, D17_64,
+ D18_64, D19_64]>>,
+
+ // Stack parameter slots for i64 and f64 are 64-bit doublewords and
+ // 8-byte aligned.
+ CCIfType<[i64, f64], CCAssignToStack<8, 8>>
+]>;
+
+def CC_Mips_FastCC : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Integer arguments are passed in integer registers. All scratch registers,
+ // except for AT, V0 and T9, are available to be used as argument registers.
+ CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6,
+ T7, T8, V1]>>,
+
+ // f32 arguments are passed in single-precision floating pointer registers.
+ CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10,
+ F11, F12, F13, F14, F15, F16, F17, F18, F19]>>,
+
+ // Stack parameter slots for i32 and f32 are 32-bit words and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>>,
+ CCIfSubtarget<"isABI_O32()", CCDelegateTo<CC_MipsO32_FastCC>>,
+ CCDelegateTo<CC_MipsN_FastCC>
+]>;
+
+//===----------------------------------------------------------------------===//
// Mips Calling Convention Dispatch
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index 7d819026da..c0e76399fb 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -145,8 +145,8 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB){
MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I)
+ for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(),
+ E = MBB->instr_end(); I != E; ++I)
emitInstruction(*I);
}
} while (MCE.finishFunction(MF));
diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td
index 8b1215ab1e..b12b1f2b5a 100644
--- a/lib/Target/Mips/MipsCondMov.td
+++ b/lib/Target/Mips/MipsCondMov.td
@@ -61,47 +61,54 @@ multiclass MovzPats0<RegisterClass CRC, RegisterClass DRC,
Instruction MOVZInst, Instruction SLTOp,
Instruction SLTuOp, Instruction SLTiOp,
Instruction SLTiuOp> {
- def : Pat<(select (i32 (setge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
- def : Pat<(select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
- def : Pat<(select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>;
- def : Pat<(select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>;
- def : Pat<(select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
- def : Pat<(select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (setge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+ def : MipsPat<
+ (select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+ def : MipsPat<
+ (select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>;
+ def : MipsPat<
+ (select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>;
+ def : MipsPat<
+ (select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
+ def : MipsPat<
+ (select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
}
multiclass MovzPats1<RegisterClass CRC, RegisterClass DRC,
Instruction MOVZInst, Instruction XOROp> {
- def : Pat<(select (i32 (seteq CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>;
- def : Pat<(select (i32 (seteq CRC:$lhs, 0)), DRC:$T, DRC:$F),
- (MOVZInst DRC:$T, CRC:$lhs, DRC:$F)>;
+ def : MipsPat<(select (i32 (seteq CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+ def : MipsPat<(select (i32 (seteq CRC:$lhs, 0)), DRC:$T, DRC:$F),
+ (MOVZInst DRC:$T, CRC:$lhs, DRC:$F)>;
}
multiclass MovzPats2<RegisterClass CRC, RegisterClass DRC,
Instruction MOVZInst, Instruction XORiOp> {
- def : Pat<(select (i32 (seteq CRC:$lhs, immZExt16:$uimm16)), DRC:$T, DRC:$F),
+ def : MipsPat<
+ (select (i32 (seteq CRC:$lhs, immZExt16:$uimm16)), DRC:$T, DRC:$F),
(MOVZInst DRC:$T, (XORiOp CRC:$lhs, immZExt16:$uimm16), DRC:$F)>;
}
multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst,
Instruction XOROp> {
- def : Pat<(select (i32 (setne CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
- (MOVNInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>;
- def : Pat<(select CRC:$cond, DRC:$T, DRC:$F),
- (MOVNInst DRC:$T, CRC:$cond, DRC:$F)>;
- def : Pat<(select (i32 (setne CRC:$lhs, 0)),DRC:$T, DRC:$F),
- (MOVNInst DRC:$T, CRC:$lhs, DRC:$F)>;
+ def : MipsPat<(select (i32 (setne CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+ (MOVNInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+ def : MipsPat<(select CRC:$cond, DRC:$T, DRC:$F),
+ (MOVNInst DRC:$T, CRC:$cond, DRC:$F)>;
+ def : MipsPat<(select (i32 (setne CRC:$lhs, 0)),DRC:$T, DRC:$F),
+ (MOVNInst DRC:$T, CRC:$lhs, DRC:$F)>;
}
// Instantiation of instructions.
def MOVZ_I_I : CondMovIntInt<CPURegs, CPURegs, 0x0a, "movz">;
-let Predicates = [HasMips64, HasStandardEncoding],DecoderNamespace = "Mips64" in {
+let Predicates = [HasMips64, HasStandardEncoding],
+ DecoderNamespace = "Mips64" in {
def MOVZ_I_I64 : CondMovIntInt<CPURegs, CPU64Regs, 0x0a, "movz">;
def MOVZ_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0a, "movz"> {
let isCodeGenOnly = 1;
@@ -139,7 +146,8 @@ let Predicates = [NotFP64bit, HasStandardEncoding] in {
def MOVZ_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 18, "movz.d">;
def MOVN_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 19, "movn.d">;
}
-let Predicates = [IsFP64bit, HasStandardEncoding],DecoderNamespace = "Mips64" in {
+let Predicates = [IsFP64bit, HasStandardEncoding],
+ DecoderNamespace = "Mips64" in {
def MOVZ_I_D64 : CondMovIntFP<CPURegs, FGR64, 17, 18, "movz.d">;
def MOVZ_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 18, "movz.d"> {
let isCodeGenOnly = 1;
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index a8c4b05ecd..2bba8a3802 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -45,10 +45,12 @@ static cl::opt<bool> SkipDelaySlotFiller(
namespace {
struct Filler : public MachineFunctionPass {
+ typedef MachineBasicBlock::instr_iterator InstrIter;
+ typedef MachineBasicBlock::reverse_instr_iterator ReverseInstrIter;
TargetMachine &TM;
const TargetInstrInfo *TII;
- MachineBasicBlock::iterator LastFiller;
+ InstrIter LastFiller;
static char ID;
Filler(TargetMachine &tm)
@@ -71,27 +73,27 @@ namespace {
}
bool isDelayFiller(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator candidate);
+ InstrIter candidate);
- void insertCallUses(MachineBasicBlock::iterator MI,
- SmallSet<unsigned, 32>& RegDefs,
- SmallSet<unsigned, 32>& RegUses);
+ void insertCallUses(InstrIter MI,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses);
- void insertDefsUses(MachineBasicBlock::iterator MI,
- SmallSet<unsigned, 32>& RegDefs,
- SmallSet<unsigned, 32>& RegUses);
+ void insertDefsUses(InstrIter MI,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses);
- bool IsRegInSet(SmallSet<unsigned, 32>& RegSet,
+ bool IsRegInSet(SmallSet<unsigned, 32> &RegSet,
unsigned Reg);
- bool delayHasHazard(MachineBasicBlock::iterator candidate,
+ bool delayHasHazard(InstrIter candidate,
bool &sawLoad, bool &sawStore,
SmallSet<unsigned, 32> &RegDefs,
SmallSet<unsigned, 32> &RegUses);
bool
- findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot,
- MachineBasicBlock::iterator &Filler);
+ findDelayInstr(MachineBasicBlock &MBB, InstrIter slot,
+ InstrIter &Filler);
};
@@ -103,14 +105,14 @@ namespace {
bool Filler::
runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
- LastFiller = MBB.end();
+ LastFiller = MBB.instr_end();
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ for (InstrIter I = MBB.instr_begin(); I != MBB.instr_end(); ++I)
if (I->hasDelaySlot()) {
++FilledSlots;
Changed = true;
- MachineBasicBlock::iterator D;
+ InstrIter D;
if (EnableDelaySlotFiller && findDelayInstr(MBB, I, D)) {
MBB.splice(llvm::next(I), &MBB, D);
@@ -121,6 +123,10 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) {
// Record the filler instruction that filled the delay slot.
// The instruction after it will be visited in the next iteration.
LastFiller = ++I;
+
+ // Set InsideBundle bit so that the machine verifier doesn't expect this
+ // instruction to be a terminator.
+ LastFiller->setIsInsideBundle();
}
return Changed;
@@ -133,8 +139,8 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
}
bool Filler::findDelayInstr(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator slot,
- MachineBasicBlock::iterator &Filler) {
+ InstrIter slot,
+ InstrIter &Filler) {
SmallSet<unsigned, 32> RegDefs;
SmallSet<unsigned, 32> RegUses;
@@ -143,13 +149,13 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB,
bool sawLoad = false;
bool sawStore = false;
- for (MachineBasicBlock::reverse_iterator I(slot); I != MBB.rend(); ++I) {
+ for (ReverseInstrIter I(slot); I != MBB.instr_rend(); ++I) {
// skip debug value
if (I->isDebugValue())
continue;
// Convert to forward iterator.
- MachineBasicBlock::iterator FI(llvm::next(I).base());
+ InstrIter FI(llvm::next(I).base());
if (I->hasUnmodeledSideEffects()
|| I->isInlineAsm()
@@ -175,7 +181,7 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB,
return false;
}
-bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
+bool Filler::delayHasHazard(InstrIter candidate,
bool &sawLoad, bool &sawStore,
SmallSet<unsigned, 32> &RegDefs,
SmallSet<unsigned, 32> &RegUses) {
@@ -223,9 +229,9 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
}
// Insert Defs and Uses of MI into the sets RegDefs and RegUses.
-void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
- SmallSet<unsigned, 32>& RegDefs,
- SmallSet<unsigned, 32>& RegUses) {
+void Filler::insertDefsUses(InstrIter MI,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses) {
// If MI is a call or return, just examine the explicit non-variadic operands.
MCInstrDesc MCID = MI->getDesc();
unsigned e = MI->isCall() || MI->isReturn() ? MCID.getNumOperands() :
@@ -250,7 +256,7 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
}
//returns true if the Reg or its alias is in the RegSet.
-bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg) {
+bool Filler::IsRegInSet(SmallSet<unsigned, 32> &RegSet, unsigned Reg) {
// Check Reg and all aliased Registers.
for (MCRegAliasIterator AI(Reg, TM.getRegisterInfo(), true);
AI.isValid(); ++AI)
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 27609c13ea..5afd2fc576 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -94,38 +94,6 @@ bool MipsFrameLowering::targetHandlesStackFrameRounding() const {
return true;
}
-// Build an instruction sequence to load an immediate that is too large to fit
-// in 16-bit and add the result to Reg.
-static void expandLargeImm(unsigned Reg, int64_t Imm, bool IsN64,
- const MipsInstrInfo &TII, MachineBasicBlock& MBB,
- MachineBasicBlock::iterator II, DebugLoc DL) {
- unsigned LUi = IsN64 ? Mips::LUi64 : Mips::LUi;
- unsigned ADDu = IsN64 ? Mips::DADDu : Mips::ADDu;
- unsigned ZEROReg = IsN64 ? Mips::ZERO_64 : Mips::ZERO;
- unsigned ATReg = IsN64 ? Mips::AT_64 : Mips::AT;
- MipsAnalyzeImmediate AnalyzeImm;
- const MipsAnalyzeImmediate::InstSeq &Seq =
- AnalyzeImm.Analyze(Imm, IsN64 ? 64 : 32, false /* LastInstrIsADDiu */);
- MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
-
- // The first instruction can be a LUi, which is different from other
- // instructions (ADDiu, ORI and SLL) in that it does not have a register
- // operand.
- if (Inst->Opc == LUi)
- BuildMI(MBB, II, DL, TII.get(LUi), ATReg)
- .addImm(SignExtend64<16>(Inst->ImmOpnd));
- else
- BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg)
- .addImm(SignExtend64<16>(Inst->ImmOpnd));
-
- // Build the remaining instructions in Seq.
- for (++Inst; Inst != Seq.end(); ++Inst)
- BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg)
- .addImm(SignExtend64<16>(Inst->ImmOpnd));
-
- BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(Reg).addReg(ATReg);
-}
-
void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -144,9 +112,12 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
// First, compute final stack size.
unsigned StackAlign = getStackAlignment();
- uint64_t StackSize =
- RoundUpToAlignment(MipsFI->getMaxCallFrameSize(), StackAlign) +
- RoundUpToAlignment(MFI->getStackSize(), StackAlign);
+ uint64_t StackSize = RoundUpToAlignment(MFI->getStackSize(), StackAlign);
+
+ if (MipsFI->globalBaseRegSet())
+ StackSize += MFI->getObjectOffset(MipsFI->getGlobalRegFI()) + StackAlign;
+ else
+ StackSize += RoundUpToAlignment(MipsFI->getMaxCallFrameSize(), StackAlign);
// Update stack size
MFI->setStackSize(StackSize);
@@ -162,8 +133,12 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
if (isInt<16>(-StackSize)) // addi sp, sp, (-stacksize)
BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(-StackSize);
else { // Expand immediate that doesn't fit in 16-bit.
- MipsFI->setEmitNOAT();
- expandLargeImm(SP, -StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl);
+ unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT;
+
+ MF.getInfo<MipsFunctionInfo>()->setEmitNOAT();
+ Mips::loadImmediate(-StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl, false,
+ 0);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDu), SP).addReg(SP).addReg(ATReg);
}
// emit ".cfi_def_cfa_offset StackSize"
@@ -264,14 +239,20 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
// Adjust stack.
if (isInt<16>(StackSize)) // addi sp, sp, (-stacksize)
BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(StackSize);
- else // Expand immediate that doesn't fit in 16-bit.
- expandLargeImm(SP, StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl);
+ else { // Expand immediate that doesn't fit in 16-bit.
+ unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT;
+
+ MF.getInfo<MipsFunctionInfo>()->setEmitNOAT();
+ Mips::loadImmediate(StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl, false,
+ 0);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDu), SP).addReg(SP).addReg(ATReg);
+ }
}
void MipsFrameLowering::
processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
- MachineRegisterInfo& MRI = MF.getRegInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
// FIXME: remove this code if register allocator can correctly mark
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index b1ac73579f..f1c672ad86 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -117,15 +117,22 @@ private:
void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
- if (!MipsFI->globalBaseRegSet())
+ if (((MF.getTarget().getRelocationModel() == Reloc::Static) ||
+ Subtarget.inMips16Mode()) && !MipsFI->globalBaseRegSet())
return;
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator I = MBB.begin();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const MipsRegisterInfo *TargetRegInfo = TM.getRegisterInfo();
+ const MipsInstrInfo *MII = TM.getInstrInfo();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+ int FI = 0;
+
+ if (!Subtarget.inMips16Mode())
+ FI= MipsFI->initGlobalRegFI();
const TargetRegisterClass *RC = Subtarget.isABI_N64() ?
(const TargetRegisterClass*)&Mips::CPU64RegsRegClass :
@@ -144,9 +151,12 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
const GlobalValue *FName = MF.getFunction();
BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
.addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0).addReg(Mips::T9_64);
+ BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
+ .addReg(Mips::T9_64);
BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
.addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+ MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC,
+ TargetRegInfo);
return;
}
@@ -159,6 +169,8 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
.addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
.addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
+ MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC,
+ TargetRegInfo);
return;
}
@@ -175,11 +187,17 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
.addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+ MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC,
+ TargetRegInfo);
return;
}
assert(Subtarget.isABI_O32());
+ if (Subtarget.inMips16Mode())
+ return; // no need to load GP. It can be calculated anywhere
+
+
// For O32 ABI, the following instruction sequence is emitted to initialize
// the global base register:
//
@@ -201,6 +219,7 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
MBB.addLiveIn(Mips::V0);
BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
.addReg(Mips::V0).addReg(Mips::T9);
+ MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC, TargetRegInfo);
}
bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
@@ -274,7 +293,7 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
// If Parent is an unaligned f32 load or store, select a (base + index)
// floating point load/store instruction (luxc1 or suxc1).
- const LSBaseSDNode* LS = 0;
+ const LSBaseSDNode *LS = 0;
if (Parent && (LS = dyn_cast<LSBaseSDNode>(Parent))) {
EVT VT = LS->getMemoryVT();
@@ -335,17 +354,18 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
// lui $2, %hi($CPI1_0)
// lwc1 $f0, %lo($CPI1_0)($2)
if (Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
- SDValue LoVal = Addr.getOperand(1);
- if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) ||
- isa<GlobalAddressSDNode>(LoVal.getOperand(0))) {
+ SDValue LoVal = Addr.getOperand(1), Opnd0 = LoVal.getOperand(0);
+ if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
+ isa<JumpTableSDNode>(Opnd0)) {
Base = Addr.getOperand(0);
- Offset = LoVal.getOperand(0);
+ Offset = Opnd0;
return true;
}
}
// If an indexed floating point load/store can be emitted, return false.
- if (LS && (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
+ if (LS &&
+ (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
Subtarget.hasMips32r2Or64() && !Subtarget.isTargetNaCl()/*@LOCALMOD*/)
return false;
}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 04d4743b35..bc0a616e33 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -304,6 +304,7 @@ MipsTargetLowering(MipsTargetMachine &TM)
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::ADD);
setMinFunctionAlignment(HasMips64 ? 3 : 2);
@@ -312,6 +313,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0);
setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1);
+
+ maxStoresPerMemcpy = 16;
}
bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
@@ -340,17 +343,17 @@ EVT MipsTargetLowering::getSetCCResultType(EVT VT) const {
// Lo0: initial value of Lo register
// Hi0: initial value of Hi register
// Return true if pattern matching was successful.
-static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) {
+static bool SelectMadd(SDNode *ADDENode, SelectionDAG *CurDAG) {
// ADDENode's second operand must be a flag output of an ADDC node in order
// for the matching to be successful.
- SDNode* ADDCNode = ADDENode->getOperand(2).getNode();
+ SDNode *ADDCNode = ADDENode->getOperand(2).getNode();
if (ADDCNode->getOpcode() != ISD::ADDC)
return false;
SDValue MultHi = ADDENode->getOperand(0);
SDValue MultLo = ADDCNode->getOperand(0);
- SDNode* MultNode = MultHi.getNode();
+ SDNode *MultNode = MultHi.getNode();
unsigned MultOpc = MultHi.getOpcode();
// MultHi and MultLo must be generated by the same node,
@@ -413,17 +416,17 @@ static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) {
// Lo0: initial value of Lo register
// Hi0: initial value of Hi register
// Return true if pattern matching was successful.
-static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) {
+static bool SelectMsub(SDNode *SUBENode, SelectionDAG *CurDAG) {
// SUBENode's second operand must be a flag output of an SUBC node in order
// for the matching to be successful.
- SDNode* SUBCNode = SUBENode->getOperand(2).getNode();
+ SDNode *SUBCNode = SUBENode->getOperand(2).getNode();
if (SUBCNode->getOpcode() != ISD::SUBC)
return false;
SDValue MultHi = SUBENode->getOperand(1);
SDValue MultLo = SUBCNode->getOperand(1);
- SDNode* MultNode = MultHi.getNode();
+ SDNode *MultNode = MultHi.getNode();
unsigned MultOpc = MultHi.getOpcode();
// MultHi and MultLo must be generated by the same node,
@@ -478,9 +481,9 @@ static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) {
return true;
}
-static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG,
+static SDValue PerformADDECombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget* Subtarget) {
+ const MipsSubtarget *Subtarget) {
if (DCI.isBeforeLegalize())
return SDValue();
@@ -491,9 +494,9 @@ static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG,
return SDValue();
}
-static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG,
+static SDValue PerformSUBECombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget* Subtarget) {
+ const MipsSubtarget *Subtarget) {
if (DCI.isBeforeLegalize())
return SDValue();
@@ -504,9 +507,9 @@ static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG,
return SDValue();
}
-static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG,
+static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget* Subtarget) {
+ const MipsSubtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -581,7 +584,7 @@ static bool InvertFPCondCode(Mips::CondCode CC) {
// Creates and returns an FPCmp node from a setcc node.
// Returns Op if setcc is not a floating point comparison.
-static SDValue CreateFPCmp(SelectionDAG& DAG, const SDValue& Op) {
+static SDValue CreateFPCmp(SelectionDAG &DAG, const SDValue &Op) {
// must be a SETCC node
if (Op.getOpcode() != ISD::SETCC)
return Op;
@@ -603,7 +606,7 @@ static SDValue CreateFPCmp(SelectionDAG& DAG, const SDValue& Op) {
}
// Creates and returns a CMovFPT/F node.
-static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True,
+static SDValue CreateCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True,
SDValue False, DebugLoc DL) {
bool invert = InvertFPCondCode((Mips::CondCode)
cast<ConstantSDNode>(Cond.getOperand(2))
@@ -613,9 +616,9 @@ static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True,
True.getValueType(), True, False, Cond);
}
-static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG& DAG,
+static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget* Subtarget) {
+ const MipsSubtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -639,16 +642,16 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG& DAG,
const DebugLoc DL = N->getDebugLoc();
ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
SDValue True = N->getOperand(1);
-
+
SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0),
SetCC.getOperand(1), ISD::getSetCCInverse(CC, true));
-
+
return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True);
}
-static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
+static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget* Subtarget) {
+ const MipsSubtarget *Subtarget) {
// Pattern match EXT.
// $dst = and ((sra or srl) $src , pos), (2**size - 1)
// => ext $dst, $src, size, pos
@@ -686,9 +689,9 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
DAG.getConstant(SMSize, MVT::i32));
}
-static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG,
+static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget* Subtarget) {
+ const MipsSubtarget *Subtarget) {
// Pattern match INS.
// $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1),
// where mask1 = (2**size - 1) << pos, mask0 = ~mask1
@@ -740,6 +743,33 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG,
DAG.getConstant(SMSize0, MVT::i32), And0.getOperand(0));
}
+static SDValue PerformADDCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt))
+
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue Add = N->getOperand(1);
+
+ if (Add.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ SDValue Lo = Add.getOperand(1);
+
+ if ((Lo.getOpcode() != MipsISD::Lo) ||
+ (Lo.getOperand(0).getOpcode() != ISD::TargetJumpTable))
+ return SDValue();
+
+ EVT ValTy = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ SDValue Add1 = DAG.getNode(ISD::ADD, DL, ValTy, N->getOperand(0),
+ Add.getOperand(0));
+ return DAG.getNode(ISD::ADD, DL, ValTy, Add1, Lo);
+}
+
SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
const {
SelectionDAG &DAG = DCI.DAG;
@@ -755,11 +785,13 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
case ISD::UDIVREM:
return PerformDivRemCombine(N, DAG, DCI, Subtarget);
case ISD::SELECT:
- return PerformSELECTCombine(N, DAG, DCI, Subtarget);
+ return PerformSELECTCombine(N, DAG, DCI, Subtarget);
case ISD::AND:
return PerformANDCombine(N, DAG, DCI, Subtarget);
case ISD::OR:
return PerformORCombine(N, DAG, DCI, Subtarget);
+ case ISD::ADD:
+ return PerformADDCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
@@ -832,7 +864,7 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
/*
static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB,
DebugLoc dl,
- const MipsSubtarget* Subtarget,
+ const MipsSubtarget *Subtarget,
const TargetInstrInfo *TII,
bool isFPCmp, unsigned Opc) {
// There is no need to expand CMov instructions if target has
@@ -2053,7 +2085,7 @@ LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
// TODO: set SType according to the desired memory barrier behavior.
SDValue
-MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const {
+MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const {
unsigned SType = 0;
DebugLoc dl = Op.getDebugLoc();
return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0),
@@ -2061,7 +2093,7 @@ MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const {
}
SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op,
- SelectionDAG& DAG) const {
+ SelectionDAG &DAG) const {
// FIXME: Need pseudo-fence for 'singlethread' fences
// FIXME: Set SType for weaker fences where supported/appropriate.
unsigned SType = 0;
@@ -2071,7 +2103,7 @@ SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op,
}
SDValue MipsTargetLowering::LowerShiftLeftParts(SDValue Op,
- SelectionDAG& DAG) const {
+ SelectionDAG &DAG) const {
DebugLoc DL = Op.getDebugLoc();
SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
SDValue Shamt = Op.getOperand(2);
@@ -2093,15 +2125,15 @@ SDValue MipsTargetLowering::LowerShiftLeftParts(SDValue Op,
SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, MVT::i32, Lo, Shamt);
SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt,
DAG.getConstant(0x20, MVT::i32));
- Lo = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, DAG.getConstant(0, MVT::i32),
- ShiftLeftLo);
+ Lo = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond,
+ DAG.getConstant(0, MVT::i32), ShiftLeftLo);
Hi = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, ShiftLeftLo, Or);
SDValue Ops[2] = {Lo, Hi};
return DAG.getMergeValues(Ops, 2, DL);
}
-SDValue MipsTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG& DAG,
+SDValue MipsTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
bool IsSRA) const {
DebugLoc DL = Op.getDebugLoc();
SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
@@ -2144,17 +2176,15 @@ SDValue MipsTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG& DAG,
static SDValue CreateLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD,
SDValue Chain, SDValue Src, unsigned Offset) {
- SDValue BasePtr = LD->getBasePtr(), Ptr;
+ SDValue Ptr = LD->getBasePtr();
EVT VT = LD->getValueType(0), MemVT = LD->getMemoryVT();
- EVT BasePtrVT = BasePtr.getValueType();
+ EVT BasePtrVT = Ptr.getValueType();
DebugLoc DL = LD->getDebugLoc();
SDVTList VTList = DAG.getVTList(VT, MVT::Other);
if (Offset)
- Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, BasePtr,
+ Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, Ptr,
DAG.getConstant(Offset, BasePtrVT));
- else
- Ptr = BasePtr;
SDValue Ops[] = { Chain, Ptr, Src };
return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, 3, MemVT,
@@ -2225,17 +2255,14 @@ SDValue MipsTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
static SDValue CreateStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD,
SDValue Chain, unsigned Offset) {
- SDValue BasePtr = SD->getBasePtr(), Ptr, Value = SD->getValue();
- EVT MemVT = SD->getMemoryVT();
- EVT BasePtrVT = BasePtr.getValueType();
+ SDValue Ptr = SD->getBasePtr(), Value = SD->getValue();
+ EVT MemVT = SD->getMemoryVT(), BasePtrVT = Ptr.getValueType();
DebugLoc DL = SD->getDebugLoc();
SDVTList VTList = DAG.getVTList(MVT::Other);
if (Offset)
- Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, BasePtr,
+ Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, Ptr,
DAG.getConstant(Offset, BasePtrVT));
- else
- Ptr = BasePtr;
SDValue Ops[] = { Chain, Value, Ptr };
return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, 3, MemVT,
@@ -2472,10 +2499,10 @@ static unsigned getNextIntArgReg(unsigned Reg) {
// Write ByVal Arg to arg registers and stack.
static void
WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
- SmallVector<std::pair<unsigned, SDValue>, 16>& RegsToPass,
- SmallVector<SDValue, 8>& MemOpChains, int& LastFI,
+ SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass,
+ SmallVector<SDValue, 8> &MemOpChains, int &LastFI,
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
- const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
+ const CCValAssign &VA, const ISD::ArgFlagsTy &Flags,
MVT PtrType, bool isLittle) {
unsigned LocMemOffset = VA.getLocMemOffset();
unsigned Offset = 0;
@@ -2563,10 +2590,10 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
// Copy Mips64 byVal arg to registers and stack.
void static
PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
- SmallVector<std::pair<unsigned, SDValue>, 16>& RegsToPass,
- SmallVector<SDValue, 8>& MemOpChains, int& LastFI,
+ SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass,
+ SmallVector<SDValue, 8> &MemOpChains, int &LastFI,
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
- const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
+ const CCValAssign &VA, const ISD::ArgFlagsTy &Flags,
EVT PtrTy, bool isLittle) {
unsigned ByValSize = Flags.getByValSize();
unsigned Alignment = std::min(Flags.getByValAlign(), (unsigned)8);
@@ -2679,7 +2706,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
- if (IsO32)
+ if (CallConv == CallingConv::Fast)
+ CCInfo.AnalyzeCallOperands(Outs, CC_Mips_FastCC);
+ else if (IsO32)
CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32);
else if (HasMips64)
AnalyzeMips64CallOperands(CCInfo, Outs);
@@ -2704,7 +2733,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Update size of the maximum argument space.
// For O32, a minimum of four words (16 bytes) of argument space is
// allocated.
- if (IsO32)
+ if (IsO32 && (CallConv != CallingConv::Fast))
NextStackOffset = std::max(NextStackOffset, (unsigned)16);
unsigned MaxCallFrameSize = MipsFI->getMaxCallFrameSize();
@@ -2958,7 +2987,7 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_Mips);
@@ -2977,9 +3006,9 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// Formal Arguments Calling Convention Implementation
//===----------------------------------------------------------------------===//
static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl,
- std::vector<SDValue>& OutChains,
+ std::vector<SDValue> &OutChains,
SelectionDAG &DAG, unsigned NumWords, SDValue FIN,
- const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
+ const CCValAssign &VA, const ISD::ArgFlagsTy &Flags,
const Argument *FuncArg) {
unsigned LocMem = VA.getLocMemOffset();
unsigned FirstWord = LocMem / 4;
@@ -3004,8 +3033,8 @@ static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl,
// Create frame object on stack and copy registers used for byval passing to it.
static unsigned
CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl,
- std::vector<SDValue>& OutChains, SelectionDAG &DAG,
- const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
+ std::vector<SDValue> &OutChains, SelectionDAG &DAG,
+ const CCValAssign &VA, const ISD::ArgFlagsTy &Flags,
MachineFrameInfo *MFI, bool IsRegLoc,
SmallVectorImpl<SDValue> &InVals, MipsFunctionInfo *MipsFI,
EVT PtrTy, const Argument *FuncArg) {
@@ -3064,7 +3093,9 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
- if (IsO32)
+ if (CallConv == CallingConv::Fast)
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Mips_FastCC);
+ else if (IsO32)
CCInfo.AnalyzeFormalArguments(Ins, CC_MipsO32);
else
CCInfo.AnalyzeFormalArguments(Ins, CC_Mips);
@@ -3250,7 +3281,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
// CCState - Info about the registers and stack slot.
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ getTargetMachine(), RVLocs, *DAG.getContext());
// Analize return values.
CCInfo.AnalyzeReturn(Outs, RetCC_Mips);
@@ -3398,6 +3429,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
case 'r':
if (VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8)
return std::make_pair(0U, &Mips::CPURegsRegClass);
+ if (VT == MVT::i64 && !HasMips64)
+ return std::make_pair(0U, &Mips::CPURegsRegClass);
if (VT == MVT::i64 && HasMips64)
return std::make_pair(0U, &Mips::CPU64RegsRegClass);
// This will generate an error message
@@ -3530,6 +3563,16 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
}
+EVT MipsTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+ unsigned SrcAlign, bool IsZeroVal,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ if (Subtarget->hasMips64())
+ return MVT::i64;
+
+ return MVT::i32;
+}
+
bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
if (VT != MVT::f32 && VT != MVT::f64)
return false;
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 5342e37f28..b9975c550b 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -146,7 +146,8 @@ namespace llvm {
SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const;
- SDValue LowerShiftRightParts(SDValue Op, SelectionDAG& DAG, bool IsSRA) const;
+ SDValue LowerShiftRightParts(SDValue Op, SelectionDAG& DAG,
+ bool IsSRA) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
@@ -202,6 +203,11 @@ namespace llvm {
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+ virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+ unsigned SrcAlign, bool IsZeroVal,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const;
+
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 29bd2dc494..c757b4c33f 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -54,10 +54,14 @@ let PrintMethod = "printFCCOperand", DecoderMethod = "DecodeCondCode" in
// Feature predicates.
//===----------------------------------------------------------------------===//
-def IsFP64bit : Predicate<"Subtarget.isFP64bit()">, AssemblerPredicate<"FeatureFP64Bit">;
-def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">, AssemblerPredicate<"!FeatureFP64Bit">;
-def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">, AssemblerPredicate<"FeatureSingleFloat">;
-def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">, AssemblerPredicate<"!FeatureSingleFloat">;
+def IsFP64bit : Predicate<"Subtarget.isFP64bit()">,
+ AssemblerPredicate<"FeatureFP64Bit">;
+def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">,
+ AssemblerPredicate<"!FeatureFP64Bit">;
+def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">,
+ AssemblerPredicate<"FeatureSingleFloat">;
+def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">,
+ AssemblerPredicate<"!FeatureSingleFloat">;
// FP immediate patterns.
def fpimm0 : PatLeaf<(fpimm), [{
@@ -428,46 +432,52 @@ def ExtractElementF64 :
//===----------------------------------------------------------------------===//
// Floating Point Patterns
//===----------------------------------------------------------------------===//
-def : Pat<(f32 fpimm0), (MTC1 ZERO)>;
-def : Pat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>;
+def : MipsPat<(f32 fpimm0), (MTC1 ZERO)>;
+def : MipsPat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>;
-def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>;
-def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>;
+def : MipsPat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>;
+def : MipsPat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>;
let Predicates = [NotFP64bit, HasStandardEncoding] in {
- def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D32_W (MTC1 CPURegs:$src))>;
- def : Pat<(i32 (fp_to_sint AFGR64:$src)), (MFC1 (TRUNC_W_D32 AFGR64:$src))>;
- def : Pat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>;
- def : Pat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>;
+ def : MipsPat<(f64 (sint_to_fp CPURegs:$src)),
+ (CVT_D32_W (MTC1 CPURegs:$src))>;
+ def : MipsPat<(i32 (fp_to_sint AFGR64:$src)),
+ (MFC1 (TRUNC_W_D32 AFGR64:$src))>;
+ def : MipsPat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>;
+ def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>;
}
let Predicates = [IsFP64bit, HasStandardEncoding] in {
- def : Pat<(f64 fpimm0), (DMTC1 ZERO_64)>;
- def : Pat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>;
+ def : MipsPat<(f64 fpimm0), (DMTC1 ZERO_64)>;
+ def : MipsPat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>;
- def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D64_W (MTC1 CPURegs:$src))>;
- def : Pat<(f32 (sint_to_fp CPU64Regs:$src)),
- (CVT_S_L (DMTC1 CPU64Regs:$src))>;
- def : Pat<(f64 (sint_to_fp CPU64Regs:$src)),
- (CVT_D64_L (DMTC1 CPU64Regs:$src))>;
+ def : MipsPat<(f64 (sint_to_fp CPURegs:$src)),
+ (CVT_D64_W (MTC1 CPURegs:$src))>;
+ def : MipsPat<(f32 (sint_to_fp CPU64Regs:$src)),
+ (CVT_S_L (DMTC1 CPU64Regs:$src))>;
+ def : MipsPat<(f64 (sint_to_fp CPU64Regs:$src)),
+ (CVT_D64_L (DMTC1 CPU64Regs:$src))>;
- def : Pat<(i32 (fp_to_sint FGR64:$src)), (MFC1 (TRUNC_W_D64 FGR64:$src))>;
- def : Pat<(i64 (fp_to_sint FGR32:$src)), (DMFC1 (TRUNC_L_S FGR32:$src))>;
- def : Pat<(i64 (fp_to_sint FGR64:$src)), (DMFC1 (TRUNC_L_D64 FGR64:$src))>;
+ def : MipsPat<(i32 (fp_to_sint FGR64:$src)),
+ (MFC1 (TRUNC_W_D64 FGR64:$src))>;
+ def : MipsPat<(i64 (fp_to_sint FGR32:$src)), (DMFC1 (TRUNC_L_S FGR32:$src))>;
+ def : MipsPat<(i64 (fp_to_sint FGR64:$src)),
+ (DMFC1 (TRUNC_L_D64 FGR64:$src))>;
- def : Pat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>;
- def : Pat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>;
+ def : MipsPat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>;
+ def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>;
}
// Patterns for unaligned floating point loads and stores.
let Predicates = [HasMips32r2Or64, NotN64, NotNaCl/*@LOCALMOD*/] in {
- def : Pat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>;
- def : Pat<(store_u FGR32:$src, CPURegs:$addr),
- (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>;
+ def : MipsPat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>;
+ def : MipsPat<(store_u FGR32:$src, CPURegs:$addr),
+ (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>;
}
let Predicates = [IsN64, NotNaCl/*@LOCALMOD*/] in {
- def : Pat<(f32 (load_u CPU64Regs:$addr)), (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>;
- def : Pat<(store_u FGR32:$src, CPU64Regs:$addr),
- (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>;
+ def : MipsPat<(f32 (load_u CPU64Regs:$addr)),
+ (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>;
+ def : MipsPat<(store_u FGR32:$src, CPU64Regs:$addr),
+ (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>;
}
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index c01830d509..e4eefb9905 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "MipsAnalyzeImmediate.h"
#include "MipsInstrInfo.h"
#include "MipsTargetMachine.h"
#include "MipsMachineFunction.h"
@@ -329,9 +330,9 @@ unsigned Mips::GetOppositeBranchOpc(unsigned Opc)
}
}
-static void AnalyzeCondBr(const MachineInstr* Inst, unsigned Opc,
+static void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc,
MachineBasicBlock *&BB,
- SmallVectorImpl<MachineOperand>& Cond) {
+ SmallVectorImpl<MachineOperand> &Cond) {
assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch");
int NumOp = Inst->getNumExplicitOperands();
@@ -505,3 +506,58 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
return false;
}
+/// Return the number of bytes of code the specified instruction may be.
+unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default:
+ return MI->getDesc().getSize();
+ case TargetOpcode::INLINEASM: { // Inline Asm: Variable size.
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const char *AsmStr = MI->getOperand(0).getSymbolName();
+ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+ }
+ }
+}
+
+unsigned
+llvm::Mips::loadImmediate(int64_t Imm, bool IsN64, const TargetInstrInfo &TII,
+ MachineBasicBlock& MBB,
+ MachineBasicBlock::iterator II, DebugLoc DL,
+ bool LastInstrIsADDiu,
+ MipsAnalyzeImmediate::Inst *LastInst) {
+ MipsAnalyzeImmediate AnalyzeImm;
+ unsigned Size = IsN64 ? 64 : 32;
+ unsigned LUi = IsN64 ? Mips::LUi64 : Mips::LUi;
+ unsigned ZEROReg = IsN64 ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned ATReg = IsN64 ? Mips::AT_64 : Mips::AT;
+
+ const MipsAnalyzeImmediate::InstSeq &Seq =
+ AnalyzeImm.Analyze(Imm, Size, LastInstrIsADDiu);
+ MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
+
+ if (LastInst && (Seq.size() == 1)) {
+ *LastInst = *Inst;
+ return 0;
+ }
+
+ // The first instruction can be a LUi, which is different from other
+ // instructions (ADDiu, ORI and SLL) in that it does not have a register
+ // operand.
+ if (Inst->Opc == LUi)
+ BuildMI(MBB, II, DL, TII.get(LUi), ATReg)
+ .addImm(SignExtend64<16>(Inst->ImmOpnd));
+ else
+ BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg)
+ .addImm(SignExtend64<16>(Inst->ImmOpnd));
+
+ // Build the remaining instructions in Seq. Skip the last instruction if
+ // LastInst is not 0.
+ for (++Inst; Inst != Seq.end() - !!LastInst; ++Inst)
+ BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg)
+ .addImm(SignExtend64<16>(Inst->ImmOpnd));
+
+ if (LastInst)
+ *LastInst = *Inst;
+
+ return Seq.size() - !!LastInst;
+}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 51cc9afdfa..7a0065b634 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -15,6 +15,7 @@
#define MIPSINSTRUCTIONINFO_H
#include "Mips.h"
+#include "MipsAnalyzeImmediate.h"
#include "MipsRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -24,12 +25,6 @@
namespace llvm {
-namespace Mips {
- /// GetOppositeBranchOpc - Return the inverse of the specified
- /// opcode, e.g. turning BEQ to BNE.
- unsigned GetOppositeBranchOpc(unsigned Opc);
-}
-
class MipsInstrInfo : public MipsGenInstrInfo {
MipsTargetMachine &TM;
bool IsN64;
@@ -109,8 +104,27 @@ public:
/// Insert nop instruction when hazard condition is found
virtual void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
+
+ /// Return the number of bytes of code the specified instruction may be.
+ unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
};
+namespace Mips {
+ /// GetOppositeBranchOpc - Return the inverse of the specified
+ /// opcode, e.g. turning BEQ to BNE.
+ unsigned GetOppositeBranchOpc(unsigned Opc);
+
+ /// Emit a series of instructions to load an immediate. All instructions
+ /// except for the last one are emitted. The function returns the number of
+ /// MachineInstrs generated. The opcode-immediate pair of the last
+ /// instruction is returned in LastInst, if it is not 0.
+ unsigned
+ loadImmediate(int64_t Imm, bool IsN64, const TargetInstrInfo &TII,
+ MachineBasicBlock& MBB, MachineBasicBlock::iterator II,
+ DebugLoc DL, bool LastInstrIsADDiu,
+ MipsAnalyzeImmediate::Inst *LastInst);
+}
+
}
#endif
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 60343293e8..5e388281b5 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -174,6 +174,10 @@ def HasStandardEncoding : Predicate<"Subtarget.hasStandardEncoding()">,
def IsNaCl : Predicate<"Subtarget.isTargetNaCl()">;
def NotNaCl : Predicate<"!Subtarget.isTargetNaCl()">;
+class MipsPat<dag pattern, dag result> : Pat<pattern, result> {
+ let Predicates = [HasStandardEncoding];
+}
+
//===----------------------------------------------------------------------===//
// Instruction format superclass
//===----------------------------------------------------------------------===//
@@ -218,6 +222,7 @@ def mem : Operand<i32> {
def mem64 : Operand<i64> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops CPU64Regs, simm16_64);
+ let EncoderMethod = "getMemEncoding";
}
def mem_ea : Operand<i32> {
@@ -563,6 +568,7 @@ class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>:
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
+ let Defs = [AT];
}
class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op,
@@ -574,6 +580,7 @@ class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op,
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
+ let Defs = [AT];
}
// SetCC
@@ -603,6 +610,7 @@ class JumpFJ<bits<6> op, string instr_asm>:
let hasDelaySlot = 1;
let Predicates = [RelocStatic, HasStandardEncoding];
let DecoderMethod = "DecodeJumpTarget";
+ let Defs = [AT];
}
// Unconditional branch
@@ -616,6 +624,7 @@ class UncondBranch<bits<6> op, string instr_asm>:
let isBarrier = 1;
let hasDelaySlot = 1;
let Predicates = [RelocPIC, HasStandardEncoding];
+ let Defs = [AT];
}
let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1,
@@ -1087,67 +1096,67 @@ def INS : InsBase<4, "ins", CPURegs>;
//===----------------------------------------------------------------------===//
// Small immediates
-def : Pat<(i32 immSExt16:$in),
- (ADDiu ZERO, imm:$in)>;
-def : Pat<(i32 immZExt16:$in),
- (ORi ZERO, imm:$in)>;
-def : Pat<(i32 immLow16Zero:$in),
- (LUi (HI16 imm:$in))>;
+def : MipsPat<(i32 immSExt16:$in),
+ (ADDiu ZERO, imm:$in)>;
+def : MipsPat<(i32 immZExt16:$in),
+ (ORi ZERO, imm:$in)>;
+def : MipsPat<(i32 immLow16Zero:$in),
+ (LUi (HI16 imm:$in))>;
// Arbitrary immediates
-def : Pat<(i32 imm:$imm),
+def : MipsPat<(i32 imm:$imm),
(ORi (LUi (HI16 imm:$imm)), (LO16 imm:$imm))>;
-// Carry patterns
-def : Pat<(subc CPURegs:$lhs, CPURegs:$rhs),
- (SUBu CPURegs:$lhs, CPURegs:$rhs)>;
-def : Pat<(addc CPURegs:$lhs, CPURegs:$rhs),
- (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
-def : Pat<(addc CPURegs:$src, immSExt16:$imm),
- (ADDiu CPURegs:$src, imm:$imm)>;
+// Carry MipsPatterns
+def : MipsPat<(subc CPURegs:$lhs, CPURegs:$rhs),
+ (SUBu CPURegs:$lhs, CPURegs:$rhs)>;
+def : MipsPat<(addc CPURegs:$lhs, CPURegs:$rhs),
+ (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
+def : MipsPat<(addc CPURegs:$src, immSExt16:$imm),
+ (ADDiu CPURegs:$src, imm:$imm)>;
// Call
-def : Pat<(MipsJmpLink (i32 tglobaladdr:$dst)),
- (JAL tglobaladdr:$dst)>;
-def : Pat<(MipsJmpLink (i32 texternalsym:$dst)),
- (JAL texternalsym:$dst)>;
-//def : Pat<(MipsJmpLink CPURegs:$dst),
-// (JALR CPURegs:$dst)>;
+def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)),
+ (JAL tglobaladdr:$dst)>;
+def : MipsPat<(MipsJmpLink (i32 texternalsym:$dst)),
+ (JAL texternalsym:$dst)>;
+//def : MipsPat<(MipsJmpLink CPURegs:$dst),
+// (JALR CPURegs:$dst)>;
// hi/lo relocs
-def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
-def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
-def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
-def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
-def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
-
-def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>;
-def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>;
-def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
-def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
-def : Pat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
-
-def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
- (ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
-def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)),
- (ADDiu CPURegs:$hi, tblockaddress:$lo)>;
-def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
- (ADDiu CPURegs:$hi, tjumptable:$lo)>;
-def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
- (ADDiu CPURegs:$hi, tconstpool:$lo)>;
-def : Pat<(add CPURegs:$hi, (MipsLo tglobaltlsaddr:$lo)),
- (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
+def : MipsPat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
+def : MipsPat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
+def : MipsPat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
+def : MipsPat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
+def : MipsPat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
+
+def : MipsPat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>;
+def : MipsPat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>;
+def : MipsPat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
+def : MipsPat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
+def : MipsPat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
+
+def : MipsPat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
+ (ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
+def : MipsPat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)),
+ (ADDiu CPURegs:$hi, tblockaddress:$lo)>;
+def : MipsPat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
+ (ADDiu CPURegs:$hi, tjumptable:$lo)>;
+def : MipsPat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
+ (ADDiu CPURegs:$hi, tconstpool:$lo)>;
+def : MipsPat<(add CPURegs:$hi, (MipsLo tglobaltlsaddr:$lo)),
+ (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
// gp_rel relocs
-def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
- (ADDiu CPURegs:$gp, tglobaladdr:$in)>;
-def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)),
- (ADDiu CPURegs:$gp, tconstpool:$in)>;
+def : MipsPat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
+ (ADDiu CPURegs:$gp, tglobaladdr:$in)>;
+def : MipsPat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)),
+ (ADDiu CPURegs:$gp, tconstpool:$in)>;
// wrapper_pic
class WrapperPat<SDNode node, Instruction ADDiuOp, RegisterClass RC>:
- Pat<(MipsWrapper RC:$gp, node:$in),
- (ADDiuOp RC:$gp, node:$in)>;
+ MipsPat<(MipsWrapper RC:$gp, node:$in),
+ (ADDiuOp RC:$gp, node:$in)>;
def : WrapperPat<tglobaladdr, ADDiu, CPURegs>;
def : WrapperPat<tconstpool, ADDiu, CPURegs>;
@@ -1157,58 +1166,58 @@ def : WrapperPat<tjumptable, ADDiu, CPURegs>;
def : WrapperPat<tglobaltlsaddr, ADDiu, CPURegs>;
// Mips does not have "not", so we expand our way
-def : Pat<(not CPURegs:$in),
- (NOR CPURegs:$in, ZERO)>;
+def : MipsPat<(not CPURegs:$in),
+ (NOR CPURegs:$in, ZERO)>;
// extended loads
let Predicates = [NotN64, HasStandardEncoding] in {
- def : Pat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>;
- def : Pat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>;
- def : Pat<(i32 (extloadi16_a addr:$src)), (LHu addr:$src)>;
- def : Pat<(i32 (extloadi16_u addr:$src)), (ULHu addr:$src)>;
+ def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>;
+ def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>;
+ def : MipsPat<(i32 (extloadi16_a addr:$src)), (LHu addr:$src)>;
+ def : MipsPat<(i32 (extloadi16_u addr:$src)), (ULHu addr:$src)>;
}
let Predicates = [IsN64, HasStandardEncoding] in {
- def : Pat<(i32 (extloadi1 addr:$src)), (LBu_P8 addr:$src)>;
- def : Pat<(i32 (extloadi8 addr:$src)), (LBu_P8 addr:$src)>;
- def : Pat<(i32 (extloadi16_a addr:$src)), (LHu_P8 addr:$src)>;
- def : Pat<(i32 (extloadi16_u addr:$src)), (ULHu_P8 addr:$src)>;
+ def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu_P8 addr:$src)>;
+ def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu_P8 addr:$src)>;
+ def : MipsPat<(i32 (extloadi16_a addr:$src)), (LHu_P8 addr:$src)>;
+ def : MipsPat<(i32 (extloadi16_u addr:$src)), (ULHu_P8 addr:$src)>;
}
// peepholes
let Predicates = [NotN64, HasStandardEncoding] in {
- def : Pat<(store_a (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
- def : Pat<(store_u (i32 0), addr:$dst), (USW ZERO, addr:$dst)>;
+ def : MipsPat<(store_a (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
+ def : MipsPat<(store_u (i32 0), addr:$dst), (USW ZERO, addr:$dst)>;
}
let Predicates = [IsN64, HasStandardEncoding] in {
- def : Pat<(store_a (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>;
- def : Pat<(store_u (i32 0), addr:$dst), (USW_P8 ZERO, addr:$dst)>;
+ def : MipsPat<(store_a (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>;
+ def : MipsPat<(store_u (i32 0), addr:$dst), (USW_P8 ZERO, addr:$dst)>;
}
// brcond patterns
multiclass BrcondPats<RegisterClass RC, Instruction BEQOp, Instruction BNEOp,
Instruction SLTOp, Instruction SLTuOp, Instruction SLTiOp,
Instruction SLTiuOp, Register ZEROReg> {
-def : Pat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst),
- (BNEOp RC:$lhs, ZEROReg, bb:$dst)>;
-def : Pat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst),
- (BEQOp RC:$lhs, ZEROReg, bb:$dst)>;
+def : MipsPat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst),
+ (BNEOp RC:$lhs, ZEROReg, bb:$dst)>;
+def : MipsPat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst),
+ (BEQOp RC:$lhs, ZEROReg, bb:$dst)>;
-def : Pat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst),
- (BEQ (SLTOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
-def : Pat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst),
- (BEQ (SLTuOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
-def : Pat<(brcond (i32 (setge RC:$lhs, immSExt16:$rhs)), bb:$dst),
- (BEQ (SLTiOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
-def : Pat<(brcond (i32 (setuge RC:$lhs, immSExt16:$rhs)), bb:$dst),
- (BEQ (SLTiuOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+def : MipsPat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
+def : MipsPat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTuOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
+def : MipsPat<(brcond (i32 (setge RC:$lhs, immSExt16:$rhs)), bb:$dst),
+ (BEQ (SLTiOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+def : MipsPat<(brcond (i32 (setuge RC:$lhs, immSExt16:$rhs)), bb:$dst),
+ (BEQ (SLTiuOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
-def : Pat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst),
- (BEQ (SLTOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
-def : Pat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst),
- (BEQ (SLTuOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
+def : MipsPat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
+def : MipsPat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst),
+ (BEQ (SLTuOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
-def : Pat<(brcond RC:$cond, bb:$dst),
- (BNEOp RC:$cond, ZEROReg, bb:$dst)>;
+def : MipsPat<(brcond RC:$cond, bb:$dst),
+ (BNEOp RC:$cond, ZEROReg, bb:$dst)>;
}
defm : BrcondPats<CPURegs, BEQ, BNE, SLT, SLTu, SLTi, SLTiu, ZERO>;
@@ -1216,39 +1225,39 @@ defm : BrcondPats<CPURegs, BEQ, BNE, SLT, SLTu, SLTi, SLTiu, ZERO>;
// setcc patterns
multiclass SeteqPats<RegisterClass RC, Instruction SLTiuOp, Instruction XOROp,
Instruction SLTuOp, Register ZEROReg> {
- def : Pat<(seteq RC:$lhs, RC:$rhs),
- (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>;
- def : Pat<(setne RC:$lhs, RC:$rhs),
- (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>;
+ def : MipsPat<(seteq RC:$lhs, RC:$rhs),
+ (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>;
+ def : MipsPat<(setne RC:$lhs, RC:$rhs),
+ (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>;
}
multiclass SetlePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
- def : Pat<(setle RC:$lhs, RC:$rhs),
- (XORi (SLTOp RC:$rhs, RC:$lhs), 1)>;
- def : Pat<(setule RC:$lhs, RC:$rhs),
- (XORi (SLTuOp RC:$rhs, RC:$lhs), 1)>;
+ def : MipsPat<(setle RC:$lhs, RC:$rhs),
+ (XORi (SLTOp RC:$rhs, RC:$lhs), 1)>;
+ def : MipsPat<(setule RC:$lhs, RC:$rhs),
+ (XORi (SLTuOp RC:$rhs, RC:$lhs), 1)>;
}
multiclass SetgtPats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
- def : Pat<(setgt RC:$lhs, RC:$rhs),
- (SLTOp RC:$rhs, RC:$lhs)>;
- def : Pat<(setugt RC:$lhs, RC:$rhs),
- (SLTuOp RC:$rhs, RC:$lhs)>;
+ def : MipsPat<(setgt RC:$lhs, RC:$rhs),
+ (SLTOp RC:$rhs, RC:$lhs)>;
+ def : MipsPat<(setugt RC:$lhs, RC:$rhs),
+ (SLTuOp RC:$rhs, RC:$lhs)>;
}
multiclass SetgePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
- def : Pat<(setge RC:$lhs, RC:$rhs),
- (XORi (SLTOp RC:$lhs, RC:$rhs), 1)>;
- def : Pat<(setuge RC:$lhs, RC:$rhs),
- (XORi (SLTuOp RC:$lhs, RC:$rhs), 1)>;
+ def : MipsPat<(setge RC:$lhs, RC:$rhs),
+ (XORi (SLTOp RC:$lhs, RC:$rhs), 1)>;
+ def : MipsPat<(setuge RC:$lhs, RC:$rhs),
+ (XORi (SLTuOp RC:$lhs, RC:$rhs), 1)>;
}
multiclass SetgeImmPats<RegisterClass RC, Instruction SLTiOp,
Instruction SLTiuOp> {
- def : Pat<(setge RC:$lhs, immSExt16:$rhs),
- (XORi (SLTiOp RC:$lhs, immSExt16:$rhs), 1)>;
- def : Pat<(setuge RC:$lhs, immSExt16:$rhs),
- (XORi (SLTiuOp RC:$lhs, immSExt16:$rhs), 1)>;
+ def : MipsPat<(setge RC:$lhs, immSExt16:$rhs),
+ (XORi (SLTiOp RC:$lhs, immSExt16:$rhs), 1)>;
+ def : MipsPat<(setuge RC:$lhs, immSExt16:$rhs),
+ (XORi (SLTiuOp RC:$lhs, immSExt16:$rhs), 1)>;
}
defm : SeteqPats<CPURegs, SLTiu, XOR, SLTu, ZERO>;
@@ -1258,10 +1267,10 @@ defm : SetgePats<CPURegs, SLT, SLTu>;
defm : SetgeImmPats<CPURegs, SLTi, SLTiu>;
// select MipsDynAlloc
-def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
+def : MipsPat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
// bswap pattern
-def : Pat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>;
+def : MipsPat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>;
//===----------------------------------------------------------------------===//
// Floating Point Support
diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp
index 76ca3e1767..150bdbbe6f 100644
--- a/lib/Target/Mips/MipsJITInfo.cpp
+++ b/lib/Target/Mips/MipsJITInfo.cpp
@@ -154,8 +154,8 @@ TargetJITInfo::StubLayout MipsJITInfo::getStubLayout() {
return Result;
}
-void *MipsJITInfo::emitFunctionStub(const Function* F, void *Fn,
- JITCodeEmitter &JCE) {
+void *MipsJITInfo::emitFunctionStub(const Function *F, void *Fn,
+ JITCodeEmitter &JCE) {
JCE.emitAlignment(4);
void *Addr = (void*) (JCE.getCurrentPCValue());
if (!sys::Memory::setRangeWritable(Addr, 16))
@@ -193,7 +193,7 @@ void *MipsJITInfo::emitFunctionStub(const Function* F, void *Fn,
/// it must rewrite the code to contain the actual addresses of any
/// referenced global symbols.
void MipsJITInfo::relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase) {
+ unsigned NumRelocs, unsigned char *GOTBase) {
for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
void *RelocPos = (char*) Function + MR->getMachineCodeOffset();
diff --git a/lib/Target/Mips/MipsJITInfo.h b/lib/Target/Mips/MipsJITInfo.h
index f4c4ae86d3..637a318660 100644
--- a/lib/Target/Mips/MipsJITInfo.h
+++ b/lib/Target/Mips/MipsJITInfo.h
@@ -45,8 +45,8 @@ class MipsJITInfo : public TargetJITInfo {
/// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
/// small native function that simply calls the function at the specified
/// address.
- virtual void *emitFunctionStub(const Function* F, void *Fn,
- JITCodeEmitter &JCE);
+ virtual void *emitFunctionStub(const Function *F, void *Fn,
+ JITCodeEmitter &JCE);
/// getLazyResolverFunction - Expose the lazy resolver to the JIT.
virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
@@ -55,7 +55,7 @@ class MipsJITInfo : public TargetJITInfo {
/// it must rewrite the code to contain the actual addresses of any
/// referenced global symbols.
virtual void relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase);
+ unsigned NumRelocs, unsigned char *GOTBase);
/// Initialize - Initialize internal stage for the function being JITted.
void Initialize(const MachineFunction &MF, bool isPIC) {
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
new file mode 100644
index 0000000000..7be353f190
--- /dev/null
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -0,0 +1,418 @@
+//===-- MipsLongBranch.cpp - Emit long branches ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands a branch or jump instruction into a long branch if its
+// offset is too large to fit into its immediate field.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-long-branch"
+
+#include "Mips.h"
+#include "MipsTargetMachine.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+STATISTIC(LongBranches, "Number of long branches.");
+
+static cl::opt<bool> SkipLongBranch(
+ "skip-mips-long-branch",
+ cl::init(false),
+ cl::desc("MIPS: Skip long branch pass."),
+ cl::Hidden);
+
+static cl::opt<bool> ForceLongBranch(
+ "force-mips-long-branch",
+ cl::init(false),
+ cl::desc("MIPS: Expand all branches to long format."),
+ cl::Hidden);
+
+namespace {
+ typedef MachineBasicBlock::iterator Iter;
+ typedef MachineBasicBlock::reverse_iterator ReverseIter;
+
+ struct MBBInfo {
+ uint64_t Size;
+ bool HasLongBranch;
+ MachineInstr *Br;
+
+ MBBInfo() : Size(0), HasLongBranch(false), Br(0) {}
+ };
+
+ class MipsLongBranch : public MachineFunctionPass {
+
+ public:
+ static char ID;
+ MipsLongBranch(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm),
+ TII(static_cast<const MipsInstrInfo*>(tm.getInstrInfo())) {}
+
+ virtual const char *getPassName() const {
+ return "Mips Long Branch";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ private:
+ void splitMBB(MachineBasicBlock *MBB);
+ void initMBBInfo();
+ int64_t computeOffset(const MachineInstr *Br);
+ bool offsetFitsIntoField(const MachineInstr *Br);
+ unsigned addLongBranch(MachineBasicBlock &MBB, Iter Pos,
+ MachineBasicBlock *Tgt, DebugLoc DL, bool Nop);
+ void replaceBranch(MachineBasicBlock &MBB, Iter Br, DebugLoc DL,
+ MachineBasicBlock *MBBOpnd);
+ void expandToLongBranch(MBBInfo &Info);
+
+ const TargetMachine &TM;
+ const MipsInstrInfo *TII;
+ MachineFunction *MF;
+ SmallVector<MBBInfo, 16> MBBInfos;
+ };
+
+ char MipsLongBranch::ID = 0;
+} // end of anonymous namespace
+
+/// createMipsLongBranchPass - Returns a pass that converts branches to long
+/// branches.
+FunctionPass *llvm::createMipsLongBranchPass(MipsTargetMachine &tm) {
+ return new MipsLongBranch(tm);
+}
+
+/// Iterate over list of Br's operands and search for a MachineBasicBlock
+/// operand.
+static MachineBasicBlock *getTargetMBB(const MachineInstr &Br) {
+ for (unsigned I = 0, E = Br.getDesc().getNumOperands(); I < E; ++I) {
+ const MachineOperand &MO = Br.getOperand(I);
+
+ if (MO.isMBB())
+ return MO.getMBB();
+ }
+
+ assert(false && "This instruction does not have an MBB operand.");
+ return 0;
+}
+
+// Traverse the list of instructions backwards until a non-debug instruction is
+// found or it reaches E.
+static ReverseIter getNonDebugInstr(ReverseIter B, ReverseIter E) {
+ for (; B != E; ++B)
+ if (!B->isDebugValue())
+ return B;
+
+ return E;
+}
+
+// Split MBB if it has two direct jumps/branches.
+void MipsLongBranch::splitMBB(MachineBasicBlock *MBB) {
+ ReverseIter End = MBB->rend();
+ ReverseIter LastBr = getNonDebugInstr(MBB->rbegin(), End);
+
+ // Return if MBB has no branch instructions.
+ if ((LastBr == End) ||
+ (!LastBr->isConditionalBranch() && !LastBr->isUnconditionalBranch()))
+ return;
+
+ ReverseIter FirstBr = getNonDebugInstr(llvm::next(LastBr), End);
+
+ // MBB has only one branch instruction if FirstBr is not a branch
+ // instruction.
+ if ((FirstBr == End) ||
+ (!FirstBr->isConditionalBranch() && !FirstBr->isUnconditionalBranch()))
+ return;
+
+ assert(!FirstBr->isIndirectBranch() && "Unexpected indirect branch found.");
+
+ // Create a new MBB. Move instructions in MBB to the newly created MBB.
+ MachineBasicBlock *NewMBB =
+ MF->CreateMachineBasicBlock(MBB->getBasicBlock());
+
+ // Insert NewMBB and fix control flow.
+ MachineBasicBlock *Tgt = getTargetMBB(*FirstBr);
+ NewMBB->transferSuccessors(MBB);
+ NewMBB->removeSuccessor(Tgt);
+ MBB->addSuccessor(NewMBB);
+ MBB->addSuccessor(Tgt);
+ MF->insert(llvm::next(MachineFunction::iterator(MBB)), NewMBB);
+
+ NewMBB->splice(NewMBB->end(), MBB, (++LastBr).base(), MBB->end());
+}
+
+// Fill MBBInfos.
+void MipsLongBranch::initMBBInfo() {
+ // Split the MBBs if they have two branches. Each basic block should have at
+ // most one branch after this loop is executed.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E;)
+ splitMBB(I++);
+
+ MF->RenumberBlocks();
+ MBBInfos.clear();
+ MBBInfos.resize(MF->size());
+
+ for (unsigned I = 0, E = MBBInfos.size(); I < E; ++I) {
+ MachineBasicBlock *MBB = MF->getBlockNumbered(I);
+
+ // Compute size of MBB.
+ for (MachineBasicBlock::instr_iterator MI = MBB->instr_begin();
+ MI != MBB->instr_end(); ++MI)
+ MBBInfos[I].Size += TII->GetInstSizeInBytes(&*MI);
+
+ // Search for MBB's branch instruction.
+ ReverseIter End = MBB->rend();
+ ReverseIter Br = getNonDebugInstr(MBB->rbegin(), End);
+
+ if ((Br != End) && !Br->isIndirectBranch() &&
+ (Br->isConditionalBranch() || Br->isUnconditionalBranch()))
+ MBBInfos[I].Br = (++Br).base();
+ }
+}
+
+// Compute offset of branch in number of bytes.
+int64_t MipsLongBranch::computeOffset(const MachineInstr *Br) {
+ int64_t Offset = 0;
+ int ThisMBB = Br->getParent()->getNumber();
+ int TargetMBB = getTargetMBB(*Br)->getNumber();
+
+ // Compute offset of a forward branch.
+ if (ThisMBB < TargetMBB) {
+ for (int N = ThisMBB + 1; N < TargetMBB; ++N)
+ Offset += MBBInfos[N].Size;
+
+ return Offset + 4;
+ }
+
+ // Compute offset of a backward branch.
+ for (int N = ThisMBB; N >= TargetMBB; --N)
+ Offset += MBBInfos[N].Size;
+
+ return -Offset + 4;
+}
+
+// Insert the following sequence:
+// (pic or N64)
+// lw $at, global_reg_slot
+// lw $at, got($L1)($at)
+// addiu $at, $at, lo($L1)
+// jr $at
+// noop
+// (static and !N64)
+// lui $at, hi($L1)
+// addiu $at, $at, lo($L1)
+// jr $at
+// noop
+unsigned MipsLongBranch::addLongBranch(MachineBasicBlock &MBB, Iter Pos,
+ MachineBasicBlock *Tgt, DebugLoc DL,
+ bool Nop) {
+ MF->getInfo<MipsFunctionInfo>()->setEmitNOAT();
+ bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_);
+ unsigned ABI = TM.getSubtarget<MipsSubtarget>().getTargetABI();
+ bool N64 = (ABI == MipsSubtarget::N64);
+ unsigned NumInstrs;
+
+ if (IsPIC || N64) {
+ bool HasMips64 = TM.getSubtarget<MipsSubtarget>().hasMips64();
+ unsigned AT = N64 ? Mips::AT_64 : Mips::AT;
+ unsigned Load = N64 ? Mips::LD_P8 : Mips::LW;
+ unsigned ADDiu = N64 ? Mips::DADDiu : Mips::ADDiu;
+ unsigned JR = N64 ? Mips::JR64 : Mips::JR;
+ unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
+ unsigned OFSTFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
+ const MipsRegisterInfo *MRI =
+ static_cast<const MipsRegisterInfo*>(TM.getRegisterInfo());
+ unsigned SP = MRI->getFrameRegister(*MF);
+ unsigned GlobalRegFI = MF->getInfo<MipsFunctionInfo>()->getGlobalRegFI();
+ int64_t Offset = MF->getFrameInfo()->getObjectOffset(GlobalRegFI);
+
+ if (isInt<16>(Offset)) {
+ BuildMI(MBB, Pos, DL, TII->get(Load), AT).addReg(SP).addImm(Offset);
+ NumInstrs = 1;
+ } else {
+ unsigned ADDu = N64 ? Mips::DADDu : Mips::ADDu;
+ MipsAnalyzeImmediate::Inst LastInst(0, 0);
+
+ MF->getInfo<MipsFunctionInfo>()->setEmitNOAT();
+ NumInstrs = Mips::loadImmediate(Offset, N64, *TII, MBB, Pos, DL, true,
+ &LastInst) + 2;
+ BuildMI(MBB, Pos, DL, TII->get(ADDu), AT).addReg(SP).addReg(AT);
+ BuildMI(MBB, Pos, DL, TII->get(Load), AT).addReg(AT)
+ .addImm(SignExtend64<16>(LastInst.ImmOpnd));
+ }
+
+ BuildMI(MBB, Pos, DL, TII->get(Load), AT).addReg(AT).addMBB(Tgt, GOTFlag);
+ BuildMI(MBB, Pos, DL, TII->get(ADDiu), AT).addReg(AT).addMBB(Tgt, OFSTFlag);
+ BuildMI(MBB, Pos, DL, TII->get(JR)).addReg(Mips::AT, RegState::Kill);
+ NumInstrs += 3;
+ } else {
+ BuildMI(MBB, Pos, DL, TII->get(Mips::LUi), Mips::AT)
+ .addMBB(Tgt, MipsII::MO_ABS_HI);
+ BuildMI(MBB, Pos, DL, TII->get(Mips::ADDiu), Mips::AT)
+ .addReg(Mips::AT).addMBB(Tgt, MipsII::MO_ABS_LO);
+ BuildMI(MBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT, RegState::Kill);
+ NumInstrs = 3;
+ }
+
+ if (Nop) {
+ BuildMI(MBB, Pos, DL, TII->get(Mips::NOP))->setIsInsideBundle();
+ ++NumInstrs;
+ }
+
+ return NumInstrs;
+}
+
+// Replace Br with a branch which has the opposite condition code and a
+// MachineBasicBlock operand MBBOpnd.
+void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br,
+ DebugLoc DL, MachineBasicBlock *MBBOpnd) {
+ unsigned NewOpc = Mips::GetOppositeBranchOpc(Br->getOpcode());
+ const MCInstrDesc &NewDesc = TII->get(NewOpc);
+
+ MachineInstrBuilder MIB = BuildMI(MBB, Br, DL, NewDesc);
+
+ for (unsigned I = 0, E = Br->getDesc().getNumOperands(); I < E; ++I) {
+ MachineOperand &MO = Br->getOperand(I);
+
+ if (!MO.isReg()) {
+ assert(MO.isMBB() && "MBB operand expected.");
+ break;
+ }
+
+ MIB.addReg(MO.getReg());
+ }
+
+ MIB.addMBB(MBBOpnd);
+
+ Br->eraseFromParent();
+}
+
+// Expand branch instructions to long branches.
+void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
+ I.HasLongBranch = true;
+
+ MachineBasicBlock *MBB = I.Br->getParent(), *Tgt = getTargetMBB(*I.Br);
+ DebugLoc DL = I.Br->getDebugLoc();
+
+ if (I.Br->isUnconditionalBranch()) {
+ // Unconditional branch before transformation:
+ // b $tgt
+ // delay-slot-instr
+ //
+ // after transformation:
+ // delay-slot-instr
+ // lw $at, global_reg_slot
+ // lw $at, %got($tgt)($at)
+ // addiu $at, $at, %lo($tgt)
+ // jr $at
+ // nop
+ I.Size += (addLongBranch(*MBB, llvm::next(Iter(I.Br)), Tgt, DL, true)
+ - 1) * 4;
+
+ // Remove branch and clear InsideBundle bit of the next instruction.
+ llvm::next(MachineBasicBlock::instr_iterator(I.Br))
+ ->setIsInsideBundle(false);
+ I.Br->eraseFromParent();
+ return;
+ }
+
+ assert(I.Br->isConditionalBranch() && "Conditional branch expected.");
+
+ // Conditional branch before transformation:
+ // b cc, $tgt
+ // delay-slot-instr
+ // FallThrough:
+ //
+ // after transformation:
+ // b !cc, FallThrough
+ // delay-slot-instr
+ // NewMBB:
+ // lw $at, global_reg_slot
+ // lw $at, %got($tgt)($at)
+ // addiu $at, $at, %lo($tgt)
+ // jr $at
+ // noop
+ // FallThrough:
+
+ MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(MBB->getBasicBlock());
+ MF->insert(llvm::next(MachineFunction::iterator(MBB)), NewMBB);
+ MBB->removeSuccessor(Tgt);
+ MBB->addSuccessor(NewMBB);
+ NewMBB->addSuccessor(Tgt);
+
+ I.Size += addLongBranch(*NewMBB, NewMBB->begin(), Tgt, DL, true) * 4;
+ replaceBranch(*MBB, I.Br, DL, *MBB->succ_begin());
+}
+
+static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) {
+ MachineBasicBlock &MBB = F.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ DebugLoc DL = MBB.findDebugLoc(MBB.begin());
+ BuildMI(MBB, I, DL, TII->get(Mips::LUi), Mips::V0)
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII->get(Mips::ADDiu), Mips::V0)
+ .addReg(Mips::V0).addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
+ MBB.removeLiveIn(Mips::V0);
+}
+
+bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
+ if ((TM.getRelocationModel() == Reloc::PIC_) &&
+ TM.getSubtarget<MipsSubtarget>().isABI_O32() &&
+ F.getInfo<MipsFunctionInfo>()->globalBaseRegSet())
+ emitGPDisp(F, TII);
+
+ if (SkipLongBranch)
+ return true;
+
+ MF = &F;
+ initMBBInfo();
+
+ bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_);
+ SmallVector<MBBInfo, 16>::iterator I, E = MBBInfos.end();
+ bool EverMadeChange = false, MadeChange = true;
+
+ while (MadeChange) {
+ MadeChange = false;
+
+ for (I = MBBInfos.begin(); I != E; ++I) {
+ // Skip if this MBB doesn't have a branch or the branch has already been
+ // converted to a long branch.
+ if (!I->Br || I->HasLongBranch)
+ continue;
+
+ if (!ForceLongBranch) {
+ int64_t Offset = computeOffset(I->Br);
+
+ // Check if offset fits into 16-bit immediate field of branches.
+ if ((I->Br->isConditionalBranch() || IsPIC) && isInt<16>(Offset / 4))
+ continue;
+
+ // Check if offset fits into 26-bit immediate field of jumps (J).
+ if (I->Br->isUnconditionalBranch() && !IsPIC && isInt<26>(Offset / 4))
+ continue;
+ }
+
+ expandToLongBranch(*I);
+ ++LongBranches;
+ EverMadeChange = MadeChange = true;
+ }
+ }
+
+ if (EverMadeChange)
+ MF->RenumberBlocks();
+
+ return true;
+}
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 0475777eac..ac33619c34 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
MipsMCInstLower::MipsMCInstLower(MipsAsmPrinter &asmprinter)
: AsmPrinter(asmprinter) {}
-void MipsMCInstLower::Initialize(Mangler *M, MCContext* C) {
+void MipsMCInstLower::Initialize(Mangler *M, MCContext *C) {
Mang = M;
Ctx = C;
}
@@ -105,21 +105,23 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
assert(Offset > 0);
const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, *Ctx);
- const MCBinaryExpr *AddExpr = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx);
- return MCOperand::CreateExpr(AddExpr);
+ const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx);
+ return MCOperand::CreateExpr(Add);
}
-static void CreateMCInst(MCInst& Inst, unsigned Opc, const MCOperand& Opnd0,
- const MCOperand& Opnd1,
- const MCOperand& Opnd2 = MCOperand()) {
+/*
+static void CreateMCInst(MCInst& Inst, unsigned Opc, const MCOperand &Opnd0,
+ const MCOperand &Opnd1,
+ const MCOperand &Opnd2 = MCOperand()) {
Inst.setOpcode(Opc);
Inst.addOperand(Opnd0);
Inst.addOperand(Opnd1);
if (Opnd2.isValid())
Inst.addOperand(Opnd2);
}
+*/
-MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO,
+MCOperand MipsMCInstLower::LowerOperand(const MachineOperand &MO,
unsigned offset) const {
MachineOperandType MOTy = MO.getType();
@@ -157,11 +159,6 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
}
}
-// Create the following two instructions:
-// "lui $2, %hi(_gp_disp)"
-// "addiu $2, $2, %lo(_gp_disp)"
-void MipsMCInstLower::LowerSETGP01(SmallVector<MCInst, 4>& MCInsts) {
- MCOperand RegOpnd = MCOperand::CreateReg(Mips::V0);
MCInst Instr4, Mask1, Mask2; // @LOCALMOD
// @LOCALMOD-START
MCOperand MaskReg = MCOperand::CreateReg(Mips::LoadStoreStackMaskReg);
@@ -205,18 +202,4 @@ void MipsMCInstLower::LowerSETGP01(SmallVector<MCInst, 4>& MCInsts) {
llvm_unreachable("unaligned instruction not sandboxed");
}
}
- // @LOCALMOD-END
- StringRef SymName("_gp_disp");
- const MCSymbol *Sym = Ctx->GetOrCreateSymbol(SymName);
- const MCSymbolRefExpr *MCSym;
-
- MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_HI, *Ctx);
- MCOperand SymHi = MCOperand::CreateExpr(MCSym);
- MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_LO, *Ctx);
- MCOperand SymLo = MCOperand::CreateExpr(MCSym);
-
- MCInsts.resize(2);
-
- CreateMCInst(MCInsts[0], Mips::LUi, RegOpnd, SymHi);
- CreateMCInst(MCInsts[1], Mips::ADDiu, RegOpnd, RegOpnd, SymLo);
-}
+ // @LOCALMOD-END \ No newline at end of file
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index 0f4944e423..314420a170 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -31,9 +31,8 @@ class LLVM_LIBRARY_VISIBILITY MipsMCInstLower {
MipsAsmPrinter &AsmPrinter;
public:
MipsMCInstLower(MipsAsmPrinter &asmprinter);
- void Initialize(Mangler *mang, MCContext* C);
+ void Initialize(Mangler *mang, MCContext *C);
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
- void LowerSETGP01(SmallVector<MCInst, 4>& MCInsts);
private:
MCOperand LowerSymbolOperand(const MachineOperand &MO,
MachineOperandType MOTy, unsigned Offset) const;
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index 04cb3f22b2..b2232c6573 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -14,8 +14,11 @@
#ifndef MIPS_MACHINE_FUNCTION_INFO_H
#define MIPS_MACHINE_FUNCTION_INFO_H
+#include "MipsSubtarget.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
#include <utility>
namespace llvm {
@@ -45,6 +48,7 @@ class MipsFunctionInfo : public MachineFunctionInfo {
// OutArgFIRange: Range of indices of all frame objects created during call to
// LowerCall except for the frame object for restoring $gp.
std::pair<int, int> InArgFIRange, OutArgFIRange;
+ int GlobalRegFI;
mutable int DynAllocFI; // Frame index of dynamically allocated stack area.
unsigned MaxCallFrameSize;
@@ -54,7 +58,7 @@ public:
MipsFunctionInfo(MachineFunction& MF)
: MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
- OutArgFIRange(std::make_pair(-1, 0)), DynAllocFI(0),
+ OutArgFIRange(std::make_pair(-1, 0)), GlobalRegFI(0), DynAllocFI(0),
MaxCallFrameSize(0), EmitNOAT(false)
{}
@@ -73,6 +77,24 @@ public:
OutArgFIRange.second = LastFI;
}
+ bool isGlobalRegFI(int FI) const {
+ return GlobalRegFI && (FI == GlobalRegFI);
+ }
+
+ int getGlobalRegFI() const {
+ return GlobalRegFI;
+ }
+
+ int initGlobalRegFI() {
+ const TargetMachine &TM = MF.getTarget();
+ unsigned RegSize = TM.getSubtarget<MipsSubtarget>().isABI_N64() ? 8 : 4;
+ int64_t StackAlignment = TM.getFrameLowering()->getStackAlignment();
+ uint64_t Offset = RoundUpToAlignment(MaxCallFrameSize, StackAlignment);
+
+ GlobalRegFI = MF.getFrameInfo()->CreateFixedObject(RegSize, Offset, true);
+ return GlobalRegFI;
+ }
+
// The first call to this function creates a frame object for dynamically
// allocated stack area.
int getDynAllocFI() const {
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 203cd9031c..3572f7d4d4 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -16,9 +16,11 @@
#include "MipsRegisterInfo.h"
#include "Mips.h"
#include "MipsAnalyzeImmediate.h"
+#include "MipsInstrInfo.h"
#include "MipsSubtarget.h"
#include "MipsMachineFunction.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Type.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -35,7 +37,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/DebugInfo.h"
#define GET_REGINFO_TARGET_DESC
#include "MipsGenRegisterInfo.inc"
@@ -54,8 +55,7 @@ unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
/// Mips Callee Saved Registers
const uint16_t* MipsRegisterInfo::
-getCalleeSavedRegs(const MachineFunction *MF) const
-{
+getCalleeSavedRegs(const MachineFunction *MF) const {
if (Subtarget.isSingleFloat())
return CSR_SingleFloatOnly_SaveList;
else if (!Subtarget.hasMips64())
@@ -64,12 +64,11 @@ getCalleeSavedRegs(const MachineFunction *MF) const
return CSR_N32_SaveList;
assert(Subtarget.isABI_N64());
- return CSR_N64_SaveList;
+ return CSR_N64_SaveList;
}
const uint32_t*
-MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const
-{
+MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
if (Subtarget.isSingleFloat())
return CSR_SingleFloatOnly_RegMask;
else if (!Subtarget.hasMips64())
@@ -78,7 +77,7 @@ MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const
return CSR_N32_RegMask;
assert(Subtarget.isABI_N64());
- return CSR_N64_RegMask;
+ return CSR_N64_RegMask;
}
BitVector MipsRegisterInfo::
@@ -212,7 +211,8 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
// incoming argument, callee-saved register location or local variable.
int64_t Offset;
- if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex))
+ if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) ||
+ MipsFI->isGlobalRegFI(FrameIndex))
Offset = spOffset;
else
Offset = spOffset + (int64_t)stackSize;
@@ -226,37 +226,17 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
if (!MI.isDebugValue() && !isInt<16>(Offset)) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = II->getDebugLoc();
- MipsAnalyzeImmediate AnalyzeImm;
- unsigned Size = Subtarget.isABI_N64() ? 64 : 32;
- unsigned LUi = Subtarget.isABI_N64() ? Mips::LUi64 : Mips::LUi;
unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
- unsigned ZEROReg = Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT;
- const MipsAnalyzeImmediate::InstSeq &Seq =
- AnalyzeImm.Analyze(Offset, Size, true /* LastInstrIsADDiu */);
- MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
+ MipsAnalyzeImmediate::Inst LastInst(0, 0);
MipsFI->setEmitNOAT();
-
- // The first instruction can be a LUi, which is different from other
- // instructions (ADDiu, ORI and SLL) in that it does not have a register
- // operand.
- if (Inst->Opc == LUi)
- BuildMI(MBB, II, DL, TII.get(LUi), ATReg)
- .addImm(SignExtend64<16>(Inst->ImmOpnd));
- else
- BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg)
- .addImm(SignExtend64<16>(Inst->ImmOpnd));
-
- // Build the remaining instructions in Seq except for the last one.
- for (++Inst; Inst != Seq.end() - 1; ++Inst)
- BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg)
- .addImm(SignExtend64<16>(Inst->ImmOpnd));
-
+ Mips::loadImmediate(Offset, Subtarget.isABI_N64(), TII, MBB, II, DL, true,
+ &LastInst);
BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg);
FrameReg = ATReg;
- Offset = SignExtend64<16>(Inst->ImmOpnd);
+ Offset = SignExtend64<16>(LastInst.ImmOpnd);
}
MI.getOperand(i).ChangeToRegister(FrameReg, false);
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 6d3f83f506..f320baed64 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -42,7 +42,7 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
void adjustMipsStackFrame(MachineFunction &MF) const;
/// Code Generation virtual methods...
- const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+ const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID) const;
BitVector getReservedRegs(const MachineFunction &MF) const;
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 835ac6d05b..c5d6bf9811 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -61,8 +61,8 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
bool
MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const {
+ TargetSubtargetInfo::AntiDepBreakMode &Mode,
+ RegClassVector &CriticalPathRCs) const {
Mode = TargetSubtargetInfo::ANTIDEP_NONE;
CriticalPathRCs.clear();
CriticalPathRCs.push_back(hasMips64() ?
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 8b67572348..78e80148f2 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -116,7 +116,7 @@ TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) {
// Install an instruction selector pass using
// the ISelDag to gen Mips code.
bool MipsPassConfig::addInstSelector() {
- PM->add(createMipsISelDag(getMipsTargetMachine()));
+ addPass(createMipsISelDag(getMipsTargetMachine()));
return false;
}
@@ -124,12 +124,18 @@ bool MipsPassConfig::addInstSelector() {
// machine code is emitted. return true if -print-machineinstrs should
// print out the code after the passes.
bool MipsPassConfig::addPreEmitPass() {
- PM->add(createMipsDelaySlotFillerPass(getMipsTargetMachine()));
+ MipsTargetMachine &TM = getMipsTargetMachine();
+ addPass(createMipsDelaySlotFillerPass(TM));
+
+ // NOTE: long branch has not been implemented for mips16.
+ if (TM.getSubtarget<MipsSubtarget>().hasStandardEncoding())
+ addPass(createMipsLongBranchPass(TM));
+
// @LOCALMOD-START
if (getMipsSubtarget().isTargetNaCl()) {
// This pass does all the heavy sfi lifting.
- PM->add(createMipsNaClRewritePass());
+ addPass(createMipsNaClRewritePass());
}
// @LOCALMOD-END
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 80c00e80f1..5cbf057416 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -69,9 +69,7 @@ namespace llvm {
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
- virtual bool addCodeEmitter(PassManagerBase &PM,
- JITCodeEmitter &JCE);
-
+ virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
};
/// MipsebTargetMachine - Mips32 big endian target machine.
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index a32a78ac83..7cb16b4dd8 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -27,6 +27,7 @@ set(NVPTXCodeGen_sources
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
+add_dependencies(LLVMNVPTXCodeGen intrinsics_gen)
add_subdirectory(TargetInfo)
add_subdirectory(InstPrinter)
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 696f459ce2..f2b96163f4 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -12,17 +12,18 @@
//
//===----------------------------------------------------------------------===//
+#include "NVPTXAsmPrinter.h"
#include "NVPTX.h"
#include "NVPTXInstrInfo.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXRegisterInfo.h"
-#include "NVPTXAsmPrinter.h"
+#include "NVPTXUtilities.h"
#include "MCTargetDesc/NVPTXMCAsmInfo.h"
#include "NVPTXNumRegisters.h"
-#include "../lib/CodeGen/AsmPrinter/DwarfDebug.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
#include "llvm/Module.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -36,17 +37,13 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/DerivedTypes.h"
-#include "NVPTXUtilities.h"
#include "llvm/Support/TimeValue.h"
-#include <sstream>
#include "llvm/Support/CommandLine.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Support/Path.h"
#include "llvm/Assembly/Writer.h"
#include "cl_common_defines.h"
-
-
+#include <sstream>
using namespace llvm;
@@ -1914,7 +1911,9 @@ bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
if (ExtraCode[1] != 0) return true; // Unknown modifier.
switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
case 'r':
break;
}
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 84c7232236..56b237252d 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -11,17 +11,17 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Function.h"
+#include "NVPTXLowerAggrCopies.h"
#include "llvm/Constants.h"
-#include "llvm/Module.h"
+#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
#include "llvm/Support/InstIterator.h"
-#include "llvm/Support/IRBuilder.h"
-#include "NVPTXLowerAggrCopies.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/LLVMContext.h"
using namespace llvm;
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 826b1dd34b..433f415a87 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -120,11 +120,11 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
}
bool NVPTXPassConfig::addInstSelector() {
- PM->add(createLowerAggrCopies());
- PM->add(createSplitBBatBarPass());
- PM->add(createAllocaHoisting());
- PM->add(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
- PM->add(createVectorElementizePass(getNVPTXTargetMachine()));
+ addPass(createLowerAggrCopies());
+ addPass(createSplitBBatBarPass());
+ addPass(createAllocaHoisting());
+ addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
+ addPass(createVectorElementizePass(getNVPTXTargetMachine()));
return false;
}
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index 1d82e5c677..b3f9cace6b 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -48,12 +48,6 @@ class NVPTXTargetMachine : public LLVMTargetMachine {
// bool DisableVerify, MCContext *&OutCtx);
public:
- //virtual bool addPassesToEmitFile(PassManagerBase &PM,
- // formatted_raw_ostream &Out,
- // CodeGenFileType FileType,
- // CodeGenOpt::Level OptLevel,
- // bool DisableVerify = true) ;
-
NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 7204926526..192d18d664 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -29,6 +29,8 @@ add_llvm_target(PowerPCCodeGen
PPCSelectionDAGInfo.cpp
)
+add_dependencies(LLVMPowerPCCodeGen intrinsics_gen)
+
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 61d23ce06a..d175e3e79e 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -86,8 +86,33 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O,
const char *Modifier) {
- assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!");
unsigned Code = MI->getOperand(OpNo).getImm();
+ if (!Modifier) {
+ unsigned CCReg = MI->getOperand(OpNo+1).getReg();
+ unsigned RegNo;
+ switch (CCReg) {
+ default: llvm_unreachable("Unknown CR register");
+ case PPC::CR0: RegNo = 0; break;
+ case PPC::CR1: RegNo = 1; break;
+ case PPC::CR2: RegNo = 2; break;
+ case PPC::CR3: RegNo = 3; break;
+ case PPC::CR4: RegNo = 4; break;
+ case PPC::CR5: RegNo = 5; break;
+ case PPC::CR6: RegNo = 6; break;
+ case PPC::CR7: RegNo = 7; break;
+ }
+
+ // Print the CR bit number. The Code is ((BI << 5) | BO) for a
+ // BCC, but we must have the positive form here (BO == 12)
+ unsigned BI = Code >> 5;
+ assert((Code & 0xF) == 12 &&
+ "BO in predicate bit must have the positive form");
+
+ unsigned Value = 4*RegNo + BI;
+ O << Value;
+ return;
+ }
+
if (StringRef(Modifier) == "cc") {
switch ((PPC::Predicate)Code) {
case PPC::PRED_ALWAYS: return; // Don't print anything for always.
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 73fd5342a1..8f1e211c3e 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -42,7 +42,7 @@ public:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printPredicateOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O, const char *Modifier);
+ raw_ostream &O, const char *Modifier = 0);
void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 96c46451be..b7f1688436 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -50,6 +50,8 @@ def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
"Enable the fsqrt instruction">;
def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
"Enable the stfiwx instruction">;
+def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
+ "Enable the isel instruction">;
def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
"Enable Book E instructions">;
@@ -66,8 +68,10 @@ include "PPCInstrInfo.td"
//
def : Processor<"generic", G3Itineraries, [Directive32]>;
-def : Processor<"440", PPC440Itineraries, [Directive440, FeatureBookE]>;
-def : Processor<"450", PPC440Itineraries, [Directive440, FeatureBookE]>;
+def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL,
+ FeatureBookE]>;
+def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL,
+ FeatureBookE]>;
def : Processor<"601", G3Itineraries, [Directive601]>;
def : Processor<"602", G3Itineraries, [Directive602]>;
def : Processor<"603", G3Itineraries, [Directive603]>;
@@ -90,10 +94,11 @@ def : Processor<"g5", G5Itineraries,
[Directive970, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
-def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
- FeatureMFOCRF, FeatureFSqrt,
- FeatureSTFIWX, Feature64Bit
- /*, Feature64BitRegs */]>;
+def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
+ FeatureMFOCRF, FeatureFSqrt,
+ FeatureSTFIWX, FeatureISEL,
+ Feature64Bit
+ /*, Feature64BitRegs */]>;
def : Processor<"pwr6", G5Itineraries,
[DirectivePwr6, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
@@ -101,7 +106,7 @@ def : Processor<"pwr6", G5Itineraries,
def : Processor<"pwr7", G5Itineraries,
[DirectivePwr7, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
- Feature64Bit /*, Feature64BitRegs */]>;
+ FeatureISEL, Feature64Bit /*, Feature64BitRegs */]>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
def : Processor<"ppc64", G5Itineraries,
[Directive64, FeatureAltivec,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index fb90600211..f76b89c803 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -22,8 +22,8 @@
#include "PPCSubtarget.h"
#include "InstPrinter/PPCInstPrinter.h"
#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
#include "llvm/Assembly/Writer.h"
@@ -248,7 +248,9 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
if (ExtraCode[1] != 0) return true; // Unknown modifier.
switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
case 'c': // Don't print "$" before a global var name or constant.
break; // PPC never has a prefix.
case 'L': // Write second word of DImode reference.
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 5234da71a8..f50f9b5a33 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -32,6 +32,7 @@
#define DEBUG_TYPE "ctrloops"
#include "PPC.h"
#include "PPCTargetMachine.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/Constants.h"
#include "llvm/PassSupport.h"
#include "llvm/ADT/DenseMap.h"
@@ -82,13 +83,14 @@ namespace {
/// getCanonicalInductionVariable - Check to see if the loop has a canonical
/// induction variable.
/// Should be defined in MachineLoop. Based upon version in class Loop.
- MachineInstr *getCanonicalInductionVariable(MachineLoop *L,
- MachineInstr *&IOp) const;
+ void getCanonicalInductionVariable(MachineLoop *L,
+ SmallVector<MachineInstr *, 4> &IVars,
+ SmallVector<MachineInstr *, 4> &IOps) const;
/// getTripCount - Return a loop-invariant LLVM register indicating the
/// number of times the loop will be executed. If the trip-count cannot
/// be determined, this return null.
- CountValue *getTripCount(MachineLoop *L, bool &WordCmp,
+ CountValue *getTripCount(MachineLoop *L,
SmallVector<MachineInstr *, 2> &OldInsts) const;
/// isInductionOperation - Return true if the instruction matches the
@@ -175,12 +177,12 @@ namespace {
/// isCompareEquals - Returns true if the instruction is a compare equals
/// instruction with an immediate operand.
-static bool isCompareEqualsImm(const MachineInstr *MI, bool &WordCmp) {
- if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPLWI) {
- WordCmp = true;
+static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) {
+ if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) {
+ SignedCmp = true;
return true;
- } else if (MI->getOpcode() == PPC::CMPDI || MI->getOpcode() == PPC::CMPLDI) {
- WordCmp = false;
+ } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) {
+ SignedCmp = false;
return true;
}
@@ -227,26 +229,27 @@ bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
/// the machine.
/// This method assumes that the IndVarSimplify pass has been run by 'opt'.
///
-MachineInstr
-*PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L,
- MachineInstr *&IOp) const {
+void
+PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L,
+ SmallVector<MachineInstr *, 4> &IVars,
+ SmallVector<MachineInstr *, 4> &IOps) const {
MachineBasicBlock *TopMBB = L->getTopBlock();
MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
assert(PI != TopMBB->pred_end() &&
"Loop must have more than one incoming edge!");
MachineBasicBlock *Backedge = *PI++;
- if (PI == TopMBB->pred_end()) return 0; // dead loop
+ if (PI == TopMBB->pred_end()) return; // dead loop
MachineBasicBlock *Incoming = *PI++;
- if (PI != TopMBB->pred_end()) return 0; // multiple backedges?
+ if (PI != TopMBB->pred_end()) return; // multiple backedges?
// make sure there is one incoming and one backedge and determine which
// is which.
if (L->contains(Incoming)) {
if (L->contains(Backedge))
- return 0;
+ return;
std::swap(Incoming, Backedge);
} else if (!L->contains(Backedge))
- return 0;
+ return;
// Loop over all of the PHI nodes, looking for a canonical induction variable:
// - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2".
@@ -263,13 +266,13 @@ MachineInstr
// Check if the definition is an induction operation.
MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg());
if (isInductionOperation(DI, DefReg)) {
- IOp = DI;
- return MPhi;
+ IOps.push_back(DI);
+ IVars.push_back(MPhi);
}
}
}
}
- return 0;
+ return;
}
/// getTripCount - Return a loop-invariant LLVM value indicating the
@@ -283,66 +286,100 @@ MachineInstr
///
/// Based upon getTripCount in LoopInfo.
///
-CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, bool &WordCmp,
+CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
SmallVector<MachineInstr *, 2> &OldInsts) const {
+ MachineBasicBlock *LastMBB = L->getExitingBlock();
+ // Don't generate a CTR loop if the loop has more than one exit.
+ if (LastMBB == 0)
+ return 0;
+
+ MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
+ if (LastI->getOpcode() != PPC::BCC)
+ return 0;
+
+ // We need to make sure that this compare is defining the condition
+ // register actually used by the terminating branch.
+
+ unsigned PredReg = LastI->getOperand(1).getReg();
+ DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI);
+
+ unsigned PredCond = LastI->getOperand(0).getImm();
+ if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
+ return 0;
+
// Check that the loop has a induction variable.
- MachineInstr *IOp;
- MachineInstr *IV_Inst = getCanonicalInductionVariable(L, IOp);
- if (IV_Inst == 0) return 0;
-
- // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm',
- // if Imm is 0, get the count from the PHI opnd
- // if Imm is -M, than M is the count
- // Otherwise, Imm is the count
- MachineOperand *IV_Opnd;
- const MachineOperand *InitialValue;
- if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
- InitialValue = &IV_Inst->getOperand(1);
- IV_Opnd = &IV_Inst->getOperand(3);
- } else {
- InitialValue = &IV_Inst->getOperand(3);
- IV_Opnd = &IV_Inst->getOperand(1);
- }
+ SmallVector<MachineInstr *, 4> IVars, IOps;
+ getCanonicalInductionVariable(L, IVars, IOps);
+ for (unsigned i = 0; i < IVars.size(); ++i) {
+ MachineInstr *IOp = IOps[i];
+ MachineInstr *IV_Inst = IVars[i];
+
+ // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm',
+ // if Imm is 0, get the count from the PHI opnd
+ // if Imm is -M, than M is the count
+ // Otherwise, Imm is the count
+ MachineOperand *IV_Opnd;
+ const MachineOperand *InitialValue;
+ if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
+ InitialValue = &IV_Inst->getOperand(1);
+ IV_Opnd = &IV_Inst->getOperand(3);
+ } else {
+ InitialValue = &IV_Inst->getOperand(3);
+ IV_Opnd = &IV_Inst->getOperand(1);
+ }
- // Look for the cmp instruction to determine if we
- // can get a useful trip count. The trip count can
- // be either a register or an immediate. The location
- // of the value depends upon the type (reg or imm).
- while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) {
- MachineInstr *MI = IV_Opnd->getParent();
- if (L->contains(MI) && isCompareEqualsImm(MI, WordCmp)) {
- OldInsts.push_back(MI);
- OldInsts.push_back(IOp);
-
- const MachineOperand &MO = MI->getOperand(2);
- assert(MO.isImm() && "IV Cmp Operand should be an immediate");
- int64_t ImmVal = MO.getImm();
-
- const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
- assert(L->contains(IV_DefInstr->getParent()) &&
- "IV definition should occurs in loop");
- int64_t iv_value = IV_DefInstr->getOperand(2).getImm();
-
- if (ImmVal == 0) {
- // Make sure the induction variable changes by one on each iteration.
- if (iv_value != 1 && iv_value != -1) {
- return 0;
- }
- return new CountValue(InitialValue->getReg(), iv_value > 0);
- } else {
+ DEBUG(dbgs() << "Considering:\n");
+ DEBUG(dbgs() << " induction operation: " << *IOp);
+ DEBUG(dbgs() << " induction variable: " << *IV_Inst);
+ DEBUG(dbgs() << " initial value: " << *InitialValue << "\n");
+
+ // Look for the cmp instruction to determine if we
+ // can get a useful trip count. The trip count can
+ // be either a register or an immediate. The location
+ // of the value depends upon the type (reg or imm).
+ while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) {
+ bool SignedCmp;
+ MachineInstr *MI = IV_Opnd->getParent();
+ if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) &&
+ MI->getOperand(0).getReg() == PredReg) {
+
+ OldInsts.push_back(MI);
+ OldInsts.push_back(IOp);
+
+ DEBUG(dbgs() << " compare: " << *MI);
+
+ const MachineOperand &MO = MI->getOperand(2);
+ assert(MO.isImm() && "IV Cmp Operand should be an immediate");
+
+ int64_t ImmVal;
+ if (SignedCmp)
+ ImmVal = (short) MO.getImm();
+ else
+ ImmVal = MO.getImm();
+
+ const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
+ assert(L->contains(IV_DefInstr->getParent()) &&
+ "IV definition should occurs in loop");
+ int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm();
+
assert(InitialValue->isReg() && "Expecting register for init value");
- const MachineInstr *DefInstr = MRI->getVRegDef(InitialValue->getReg());
-
+ unsigned InitialValueReg = InitialValue->getReg();
+
+ const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
+
// Here we need to look for an immediate load (an li or lis/ori pair).
if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 ||
DefInstr->getOpcode() == PPC::ORI)) {
- int64_t start = DefInstr->getOperand(2).getImm();
+ int64_t start = (short) DefInstr->getOperand(2).getImm();
const MachineInstr *DefInstr2 =
MRI->getVRegDef(DefInstr->getOperand(0).getReg());
if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 ||
DefInstr2->getOpcode() == PPC::LIS)) {
- start |= DefInstr2->getOperand(1).getImm() << 16;
+ DEBUG(dbgs() << " initial constant: " << *DefInstr);
+ DEBUG(dbgs() << " initial constant: " << *DefInstr2);
+ start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16;
+
int64_t count = ImmVal - start;
if ((count % iv_value) != 0) {
return 0;
@@ -351,12 +388,23 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, bool &WordCmp,
}
} else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 ||
DefInstr->getOpcode() == PPC::LI)) {
- int64_t count = ImmVal - DefInstr->getOperand(1).getImm();
+ DEBUG(dbgs() << " initial constant: " << *DefInstr);
+
+ int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm()));
if ((count % iv_value) != 0) {
return 0;
}
return new CountValue(count/iv_value);
+ } else if (iv_value == 1 || iv_value == -1) {
+ // We can't determine a constant starting value.
+ if (ImmVal == 0) {
+ return new CountValue(InitialValueReg, iv_value > 0);
+ }
+ // FIXME: handle non-zero end value.
}
+ // FIXME: handle non-unit increments (we might not want to introduce division
+ // but we can handle some 2^n cases with shifts).
+
}
}
}
@@ -524,10 +572,9 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
return Changed;
}
- bool WordCmp;
SmallVector<MachineInstr *, 2> OldInsts;
// Are we able to determine the trip count for the loop?
- CountValue *TripCount = getTripCount(L, WordCmp, OldInsts);
+ CountValue *TripCount = getTripCount(L, OldInsts);
if (TripCount == 0) {
DEBUG(dbgs() << "failed to get trip count!\n");
return false;
@@ -575,14 +622,21 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
const PPCSubtarget &Subtarget = MF->getTarget().getSubtarget<PPCSubtarget>();
bool isPPC64 = Subtarget.isPPC64();
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
+
unsigned CountReg;
if (TripCount->isReg()) {
// Create a copy of the loop count register.
- const TargetRegisterClass *RC =
+ const TargetRegisterClass *SrcRC =
MF->getRegInfo().getRegClass(TripCount->getReg());
CountReg = MF->getRegInfo().createVirtualRegister(RC);
+ unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ?
+ (unsigned) PPC::EXTSW_32_64 :
+ (unsigned) TargetOpcode::COPY;
BuildMI(*Preheader, InsertPos, dl,
- TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg());
+ TII->get(CopyOp), CountReg).addReg(TripCount->getReg());
if (TripCount->isNeg()) {
unsigned CountReg1 = CountReg;
CountReg = MF->getRegInfo().createVirtualRegister(RC);
@@ -590,26 +644,12 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
TII->get(isPPC64 ? PPC::NEG8 : PPC::NEG),
CountReg).addReg(CountReg1);
}
-
- // On a 64-bit system, if the original comparison was only 32-bit, then
- // mask out the higher-order part of the count.
- if (isPPC64 && WordCmp) {
- unsigned CountReg1 = CountReg;
- CountReg = MF->getRegInfo().createVirtualRegister(RC);
- BuildMI(*Preheader, InsertPos, dl,
- TII->get(PPC::RLDICL), CountReg).addReg(CountReg1
- ).addImm(0).addImm(32);
- }
} else {
assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
// Put the trip count in a register for transfer into the count register.
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
int64_t CountImm = TripCount->getImm();
- if (TripCount->isNeg())
- CountImm = -CountImm;
+ assert(!TripCount->isNeg() && "Constant trip count must be positive");
CountReg = MF->getRegInfo().createVirtualRegister(RC);
if (CountImm > 0xFFFF) {
@@ -665,6 +705,7 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
(isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(BranchTarget);
// Conditional branch; just delete it.
+ DEBUG(dbgs() << "Removing old branch: " << *LastI);
LastMBB->erase(LastI);
delete TripCount;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index be172c2435..c24afa908d 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -368,9 +368,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0)
.addReg(PPC::R0, RegState::Kill)
.addImm(NegFrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1)
.addReg(PPC::R1, RegState::Kill)
- .addReg(PPC::R1, RegState::Define)
+ .addReg(PPC::R1)
.addReg(PPC::R0);
} else if (isInt<16>(NegFrameSize)) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
@@ -383,9 +383,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
.addReg(PPC::R0, RegState::Kill)
.addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1)
.addReg(PPC::R1, RegState::Kill)
- .addReg(PPC::R1, RegState::Define)
+ .addReg(PPC::R1)
.addReg(PPC::R0);
}
} else { // PPC64.
@@ -401,9 +401,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0)
.addReg(PPC::X0)
.addImm(NegFrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1)
.addReg(PPC::X1, RegState::Kill)
- .addReg(PPC::X1, RegState::Define)
+ .addReg(PPC::X1)
.addReg(PPC::X0);
} else if (isInt<16>(NegFrameSize)) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
@@ -416,9 +416,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
.addReg(PPC::X0, RegState::Kill)
.addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1)
.addReg(PPC::X1, RegState::Kill)
- .addReg(PPC::X1, RegState::Define)
+ .addReg(PPC::X1)
.addReg(PPC::X0);
}
}
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index b777f9313c..a00f686adc 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -111,6 +111,23 @@ namespace {
/// immediate field. Because preinc imms have already been validated, just
/// accept it.
bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
+ if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo ||
+ N.getOpcode() == ISD::TargetGlobalAddress) {
+ Out = N;
+ return true;
+ }
+
+ return false;
+ }
+
+ /// SelectAddrIdxOffs - Return true if the operand is valid for a preinc
+ /// index field. Because preinc imms have already been validated, just
+ /// accept it.
+ bool SelectAddrIdxOffs(SDValue N, SDValue &Out) const {
+ if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo ||
+ N.getOpcode() == ISD::TargetGlobalAddress)
+ return false;
+
Out = N;
return true;
}
@@ -915,12 +932,44 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Offset, Base, Chain };
- // FIXME: PPC64
return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
PPCLowering.getPointerTy(),
MVT::Other, Ops, 3);
} else {
- llvm_unreachable("R+R preindex loads not supported yet!");
+ unsigned Opcode;
+ bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
+ if (LD->getValueType(0) != MVT::i64) {
+ // Handle PPC32 integer and normal FP loads.
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::f64: Opcode = PPC::LFDUX; break;
+ case MVT::f32: Opcode = PPC::LFSUX; break;
+ case MVT::i32: Opcode = PPC::LWZUX; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZUX; break;
+ }
+ } else {
+ assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
+ assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
+ "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::i64: Opcode = PPC::LDUX; break;
+ case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZUX8; break;
+ }
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[] = { Offset, Base, Chain };
+ return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
+ PPCLowering.getPointerTy(),
+ MVT::Other, Ops, 3);
}
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 964d5a0d94..13250b33ea 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -66,6 +66,7 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
: TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
+ const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
setPow2DivIsCheap();
@@ -75,7 +76,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
// arguments are at least 4/8 bytes aligned.
- setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4);
+ bool isPPC64 = Subtarget->isPPC64();
+ setMinStackArgumentAlignment(isPPC64 ? 8:4);
// Set up the register classes.
addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
@@ -132,17 +134,17 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FPOW , MVT::f64, Expand);
- setOperationAction(ISD::FMA , MVT::f64, Expand);
+ setOperationAction(ISD::FMA , MVT::f64, Legal);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FPOW , MVT::f32, Expand);
- setOperationAction(ISD::FMA , MVT::f32, Expand);
+ setOperationAction(ISD::FMA , MVT::f32, Legal);
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
// If we're enabling GP optimizations, use hardware square root
- if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
+ if (!Subtarget->hasFSQRT()) {
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
}
@@ -228,8 +230,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
- if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) {
- if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+ if (Subtarget->isSVR4ABI()) {
+ if (isPPC64) {
// VAARG always uses double-word chunks, so promote anything smaller.
setOperationAction(ISD::VAARG, MVT::i1, Promote);
AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
@@ -273,7 +275,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
- if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+ if (Subtarget->has64BitSupport()) {
// They also have instructions for converting between i64 and fp.
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
@@ -292,7 +294,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
}
- if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
+ if (Subtarget->use64BitRegs()) {
// 64-bit PowerPC implementations can support i64 types directly
addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
@@ -308,7 +310,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
- if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
+ if (Subtarget->hasAltivec()) {
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@@ -378,6 +380,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMA, MVT::v4f32, Legal);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -391,7 +394,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
}
- if (TM.getSubtarget<PPCSubtarget>().has64BitSupport())
+ if (Subtarget->has64BitSupport())
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
@@ -400,7 +403,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
- if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+ if (isPPC64) {
setStackPointerRegisterToSaveRestore(PPC::X1);
setExceptionPointerRegister(PPC::X3);
setExceptionSelectorRegister(PPC::X4);
@@ -417,7 +420,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setTargetDAGCombine(ISD::BSWAP);
// Darwin long double math library functions have $LDBL128 appended.
- if (TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+ if (Subtarget->isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
@@ -434,6 +437,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
if (PPCSubTarget.isDarwin())
setPrefFunctionAlignment(4);
+ if (isPPC64 && Subtarget->isJITCodeModel())
+ // Temporary workaround for the inability of PPC64 JIT to handle jump
+ // tables.
+ setSupportJumpTables(false);
+
setInsertFencesForAtomic(true);
setSchedulingPreference(Sched::Hybrid);
@@ -1105,7 +1113,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
if (VT.isVector())
return false;
- // TODO: Check reg+reg first.
+ if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) {
+ AM = ISD::PRE_INC;
+ return true;
+ }
// LDU/STU use reg+imm*4, others use reg+imm.
if (VT != MVT::i64) {
@@ -4933,11 +4944,37 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
- if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
- MI->getOpcode() == PPC::SELECT_CC_I8 ||
- MI->getOpcode() == PPC::SELECT_CC_F4 ||
- MI->getOpcode() == PPC::SELECT_CC_F8 ||
- MI->getOpcode() == PPC::SELECT_CC_VRRC) {
+ if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8)) {
+ unsigned OpCode = MI->getOpcode() == PPC::SELECT_CC_I8 ?
+ PPC::ISEL8 : PPC::ISEL;
+ unsigned SelectPred = MI->getOperand(4).getImm();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // The SelectPred is ((BI << 5) | BO) for a BCC
+ unsigned BO = SelectPred & 0xF;
+ assert((BO == 12 || BO == 4) && "invalid predicate BO field for isel");
+
+ unsigned TrueOpNo, FalseOpNo;
+ if (BO == 12) {
+ TrueOpNo = 2;
+ FalseOpNo = 3;
+ } else {
+ TrueOpNo = 3;
+ FalseOpNo = 2;
+ SelectPred = PPC::InvertPredicate((PPC::Predicate)SelectPred);
+ }
+
+ BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(TrueOpNo).getReg())
+ .addReg(MI->getOperand(FalseOpNo).getReg())
+ .addImm(SelectPred).addReg(MI->getOperand(1).getReg());
+ } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_CC_F4 ||
+ MI->getOpcode() == PPC::SELECT_CC_F8 ||
+ MI->getOpcode() == PPC::SELECT_CC_VRRC) {
+
// The incoming instruction knows the destination vreg to set, the
// condition code register to branch on, the true/false values to
@@ -5873,6 +5910,26 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
}
}
+/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+/// is expanded to mul + add.
+bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v4f32:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
if (DisableILPPref)
return TargetLowering::getSchedulingPreference(N);
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 973800b461..b0a013b4b4 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -366,6 +366,12 @@ namespace llvm {
bool IsZeroVal, bool MemcpyStrSrc,
MachineFunction &MF) const;
+ /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+ /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+ /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+ /// is expanded to mul + add.
+ virtual bool isFMAFasterThanMulAndAdd(EVT VT) const;
+
private:
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 562ae7da0b..a2bd55f533 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -497,6 +497,10 @@ def RLWINM8 : MForm_2<21,
"rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
[]>;
+def ISEL8 : AForm_1<31, 15,
+ (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond),
+ "isel $rT, $rA, $rB, $cond", IntGeneral,
+ []>;
} // End FXU Operations.
@@ -533,6 +537,16 @@ def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp
NoEncode<"$ea_result">;
// NO LWAU!
+def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lhaux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWAUX : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lwaux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">, isPPC64;
}
// Zero extending loads.
@@ -572,6 +586,22 @@ def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
"lwzu $rD, $addr", LdStLoad,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
+
+def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lbzux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LHZUX8 : XForm_1<31, 331, (outs G8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lhzux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lwzux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
}
}
@@ -607,6 +637,11 @@ def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr
[]>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
NoEncode<"$ea_result">;
+def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "ldux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">, isPPC64;
}
def : Pat<(PPCload ixaddr:$src),
@@ -680,10 +715,41 @@ def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
isPPC64;
-let mayStore = 1 in
-def STDUX : XForm_8<31, 181, (outs), (ins G8RC:$rS, memrr:$dst),
- "stdux $rS, $dst", LdStSTD,
- []>, isPPC64;
+
+def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res),
+ (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stbux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti8 G8RC:$rS,
+ ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res),
+ (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "sthux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti16 G8RC:$rS,
+ ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res),
+ (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stwux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti32 G8RC:$rS,
+ ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res),
+ (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stdux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked, isPPC64;
// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst),
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 6c0f3d3f06..b0b8423281 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -274,15 +274,11 @@ let PPC970_Unit = 5 in { // VALU Operations.
// VA-Form instructions. 3-input AltiVec ops.
def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
"vmaddfp $vD, $vA, $vC, $vB", VecFP,
- [(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC),
- VRRC:$vB))]>,
- Requires<[FPContractions]>;
+ [(set VRRC:$vD, (fma VRRC:$vA, VRRC:$vC, VRRC:$vB))]>;
def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
"vnmsubfp $vD, $vA, $vC, $vB", VecFP,
- [(set VRRC:$vD, (fsub V_immneg0,
- (fsub (fmul VRRC:$vA, VRRC:$vC),
- VRRC:$vB)))]>,
- Requires<[FPContractions]>;
+ [(set VRRC:$vD, (fneg (fma VRRC:$vA, VRRC:$vC,
+ (fneg VRRC:$vB))))]>;
def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>;
def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 28b3bc1596..47f09dca77 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -79,6 +79,22 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
return new PPCScoreboardHazardRecognizer(II, DAG);
}
+
+// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
+bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
+ switch (MI.getOpcode()) {
+ default: return false;
+ case PPC::EXTSW:
+ case PPC::EXTSW_32_64:
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ SubIdx = PPC::sub_32;
+ return true;
+ }
+}
+
unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
switch (MI->getOpcode()) {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 7d49aa129e..374213ea43 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -92,6 +92,9 @@ public:
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const;
+ bool isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const;
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index e4af8846df..9b390461d8 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -323,7 +323,7 @@ def memri : Operand<iPTR> {
}
def memrr : Operand<iPTR> {
let PrintMethod = "printMemRegReg";
- let MIOperandInfo = (ops ptr_rc, ptr_rc);
+ let MIOperandInfo = (ops ptr_rc:$offreg, ptr_rc:$ptrreg);
}
def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
let PrintMethod = "printMemRegImmShifted";
@@ -349,10 +349,10 @@ def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
/// This is just the offset part of iaddr, used for preinc.
def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
+def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>;
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
-def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">;
def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
def IsBookE : Predicate<"PPCSubTarget.isBookE()">;
@@ -711,6 +711,44 @@ def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
"lfd $rD, $addr", LdStLFD,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
+
+
+// Indexed (r+r) Loads with Update (preinc).
+def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lbzux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lhaux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHZUX : XForm_1<31, 331, (outs GPRC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lhzux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lwzux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lfsux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result),
+ (ins memrr:$addr),
+ "lfdux $rD, $addr", LdStLoad,
+ []>, RegConstraint<"$addr.offreg = $ea_result">,
+ NoEncode<"$ea_result">;
}
}
@@ -822,12 +860,49 @@ def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
"stwx $rS, $dst", LdStStore,
[(store GPRC:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
-
-let mayStore = 1 in {
-def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB),
- "stwux $rS, $rA, $rB", LdStStore,
- []>;
-}
+
+def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res),
+ (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stbux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti8 GPRC:$rS,
+ ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res),
+ (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "sthux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti16 GPRC:$rS,
+ ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res),
+ (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stwux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res),
+ (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stfsux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
+def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res),
+ (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
+ "stfdux $rS, $ptroff, $ptrreg", LdStStore,
+ [(set ptr_rc:$ea_res,
+ (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
+ RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+
def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
"sthbrx $rS, $dst", LdStStore,
[(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>,
@@ -1236,51 +1311,43 @@ let Uses = [RM] in {
def FMADD : AForm_1<63, 29,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT, (fadd (fmul F8RC:$FRA, F8RC:$FRC),
- F8RC:$FRB))]>,
- Requires<[FPContractions]>;
+ [(set F8RC:$FRT,
+ (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB))]>;
def FMADDS : AForm_1<59, 29,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC),
- F4RC:$FRB))]>,
- Requires<[FPContractions]>;
+ [(set F4RC:$FRT,
+ (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB))]>;
def FMSUB : AForm_1<63, 28,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT, (fsub (fmul F8RC:$FRA, F8RC:$FRC),
- F8RC:$FRB))]>,
- Requires<[FPContractions]>;
+ [(set F8RC:$FRT,
+ (fma F8RC:$FRA, F8RC:$FRC, (fneg F8RC:$FRB)))]>;
def FMSUBS : AForm_1<59, 28,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fsub (fmul F4RC:$FRA, F4RC:$FRC),
- F4RC:$FRB))]>,
- Requires<[FPContractions]>;
+ [(set F4RC:$FRT,
+ (fma F4RC:$FRA, F4RC:$FRC, (fneg F4RC:$FRB)))]>;
def FNMADD : AForm_1<63, 31,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC),
- F8RC:$FRB)))]>,
- Requires<[FPContractions]>;
+ [(set F8RC:$FRT,
+ (fneg (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB)))]>;
def FNMADDS : AForm_1<59, 31,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC),
- F4RC:$FRB)))]>,
- Requires<[FPContractions]>;
+ [(set F4RC:$FRT,
+ (fneg (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB)))]>;
def FNMSUB : AForm_1<63, 30,
(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
- [(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC),
- F8RC:$FRB)))]>,
- Requires<[FPContractions]>;
+ [(set F8RC:$FRT, (fneg (fma F8RC:$FRA, F8RC:$FRC,
+ (fneg F8RC:$FRB))))]>;
def FNMSUBS : AForm_1<59, 30,
(outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC),
- F4RC:$FRB)))]>,
- Requires<[FPContractions]>;
+ [(set F4RC:$FRT, (fneg (fma F4RC:$FRA, F4RC:$FRC,
+ (fneg F4RC:$FRB))))]>;
}
// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
// having 4 of these, force the comparison to always be an 8-byte double (code
@@ -1331,6 +1398,13 @@ let Uses = [RM] in {
}
let PPC970_Unit = 1 in { // FXU Operations.
+ def ISEL : AForm_1<31, 15,
+ (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond),
+ "isel $rT, $rA, $rB, $cond", IntGeneral,
+ []>;
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
// M-Form instructions. rotate and mask instructions.
//
let isCommutable = 1 in {
@@ -1441,14 +1515,6 @@ def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)),
def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)),
(ADDIS GPRC:$in, tblockaddress:$g)>;
-// Fused negative multiply subtract, alternate pattern
-def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)),
- (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>,
- Requires<[FPContractions]>;
-def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)),
- (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>,
- Requires<[FPContractions]>;
-
// Standard shifts. These are represented separately from the real shifts above
// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
// amounts.
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index ecb8754cbc..ab8bf1f93a 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -199,6 +199,20 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
}
+bool
+PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
+ switch (RC->getID()) {
+ case PPC::G8RCRegClassID:
+ case PPC::GPRCRegClassID:
+ case PPC::F8RCRegClassID:
+ case PPC::F4RCRegClassID:
+ case PPC::VRRCRegClassID:
+ return true;
+ default:
+ return false;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
@@ -328,14 +342,14 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// address of new allocated space.
if (LP64) {
if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
.addReg(Reg, RegState::Kill)
- .addReg(PPC::X1, RegState::Define)
+ .addReg(PPC::X1)
.addReg(MI.getOperand(1).getReg());
else
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
.addReg(PPC::X0, RegState::Kill)
- .addReg(PPC::X1, RegState::Define)
+ .addReg(PPC::X1)
.addReg(MI.getOperand(1).getReg());
if (!MI.getOperand(1).isKill())
@@ -349,9 +363,9 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
.addImm(maxCallFrameSize)
.addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill);
} else {
- BuildMI(MBB, II, dl, TII.get(PPC::STWUX))
+ BuildMI(MBB, II, dl, TII.get(PPC::STWUX), PPC::R1)
.addReg(Reg, RegState::Kill)
- .addReg(PPC::R1, RegState::Define)
+ .addReg(PPC::R1)
.addReg(MI.getOperand(1).getReg());
if (!MI.getOperand(1).isKill())
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 78e17c6890..152c36d699 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -47,6 +47,8 @@ public:
BitVector getReservedRegs(const MachineFunction &MF) const;
+ virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const;
+
/// requiresRegisterScavenging - We require a register scavenger.
/// FIXME (64-bit): Should be inlined.
bool requiresRegisterScavenging(const MachineFunction &MF) const;
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index c085ba26dd..bb193ac3d9 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -38,6 +38,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
, HasAltivec(false)
, HasFSQRT(false)
, HasSTFIWX(false)
+ , HasISEL(false)
, IsBookE(false)
, HasLazyResolverStubs(false)
, IsJITCodeModel(false)
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 7d9be55713..0207c83393 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -70,6 +70,7 @@ protected:
bool HasAltivec;
bool HasFSQRT;
bool HasSTFIWX;
+ bool HasISEL;
bool IsBookE;
bool HasLazyResolverStubs;
bool IsJITCodeModel;
@@ -141,6 +142,7 @@ public:
bool hasSTFIWX() const { return HasSTFIWX; }
bool hasAltivec() const { return HasAltivec; }
bool hasMFOCRF() const { return HasMFOCRF; }
+ bool hasISEL() const { return HasISEL; }
bool isBookE() const { return IsBookE; }
const Triple &getTargetTriple() const { return TargetTriple; }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index d7a808855b..980511268a 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -98,31 +98,25 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
bool PPCPassConfig::addPreRegAlloc() {
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
- PM->add(createPPCCTRLoops());
+ addPass(createPPCCTRLoops());
return false;
}
bool PPCPassConfig::addInstSelector() {
// Install an instruction selector.
- PM->add(createPPCISelDag(getPPCTargetMachine()));
+ addPass(createPPCISelDag(getPPCTargetMachine()));
return false;
}
bool PPCPassConfig::addPreEmitPass() {
// Must run branch selection immediately preceding the asm printer.
- PM->add(createPPCBranchSelectionPass());
+ addPass(createPPCBranchSelectionPass());
return false;
}
bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
JITCodeEmitter &JCE) {
- // FIXME: This should be moved to TargetJITInfo!!
- if (Subtarget.isPPC64())
- // Temporary workaround for the inability of PPC64 JIT to handle jump
- // tables.
- Options.DisableJumpTables = true;
-
// Inform the subtarget that we are in JIT mode. FIXME: does this break macho
// writing?
Subtarget.SetJITMode();
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 093255e6af..cbfa4cf35b 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -964,6 +964,12 @@ optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
//===---------------------------------------------------------------------===//
+unsigned f(unsigned x) { return ((x & 7) + 1) & 15; }
+The & 15 part should be optimized away, it doesn't change the result. Currently
+not optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
This was noticed in the entryblock for grokdeclarator in 403.gcc:
%tmp = icmp eq i32 %decl_context, 4
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index ae4af0f442..efb10db4c0 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -23,5 +23,7 @@ add_llvm_target(SparcCodeGen
SparcSelectionDAGInfo.cpp
)
+add_dependencies(LLVMSparcCodeGen intrinsics_gen)
+
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index c14b3d4a00..25548625e7 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -187,7 +187,9 @@ bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
if (ExtraCode[1] != 0) return true; // Unknown modifier.
switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
case 'r':
break;
}
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 77fd2af88d..9ee12ed7f5 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -60,7 +60,7 @@ TargetPassConfig *SparcTargetMachine::createPassConfig(PassManagerBase &PM) {
}
bool SparcPassConfig::addInstSelector() {
- PM->add(createSparcISelDag(getSparcTargetMachine()));
+ addPass(createSparcISelDag(getSparcTargetMachine()));
return false;
}
@@ -68,8 +68,8 @@ bool SparcPassConfig::addInstSelector() {
/// passes immediately before machine code is emitted. This should return
/// true if -print-machineinstrs should print out the code after the passes.
bool SparcPassConfig::addPreEmitPass(){
- PM->add(createSparcFPMoverPass(getSparcTargetMachine()));
- PM->add(createSparcDelaySlotFillerPass(getSparcTargetMachine()));
+ addPass(createSparcFPMoverPass(getSparcTargetMachine()));
+ addPass(createSparcDelaySlotFillerPass(getSparcTargetMachine()));
return true;
}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index e785d330ae..ffc1d9f0d1 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -11,7 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/GlobalAlias.h"
#include "llvm/GlobalValue.h"
+#include "llvm/GlobalVariable.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -90,26 +92,59 @@ CodeModel::Model TargetMachine::getCodeModel() const {
return CodeGenInfo->getCodeModel();
}
+/// Get the IR-specified TLS model for Var.
+static TLSModel::Model getSelectedTLSModel(const GlobalVariable *Var) {
+ switch (Var->getThreadLocalMode()) {
+ case GlobalVariable::NotThreadLocal:
+ llvm_unreachable("getSelectedTLSModel for non-TLS variable");
+ break;
+ case GlobalVariable::GeneralDynamicTLSModel:
+ return TLSModel::GeneralDynamic;
+ case GlobalVariable::LocalDynamicTLSModel:
+ return TLSModel::LocalDynamic;
+ case GlobalVariable::InitialExecTLSModel:
+ return TLSModel::InitialExec;
+ case GlobalVariable::LocalExecTLSModel:
+ return TLSModel::LocalExec;
+ }
+ llvm_unreachable("invalid TLS model");
+}
+
TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const {
- bool isLocal = GV->hasLocalLinkage();
- bool isDeclaration = GV->isDeclaration();
+ // If GV is an alias then use the aliasee for determining
+ // thread-localness.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GV = GA->resolveAliasedGlobal(false);
+ const GlobalVariable *Var = cast<GlobalVariable>(GV);
+
+ bool isLocal = Var->hasLocalLinkage();
+ bool isDeclaration = Var->isDeclaration();
+ bool isPIC = getRelocationModel() == Reloc::PIC_;
+ bool isPIE = Options.PositionIndependentExecutable;
// FIXME: what should we do for protected and internal visibility?
// For variables, is internal different from hidden?
- bool isHidden = GV->hasHiddenVisibility();
+ bool isHidden = Var->hasHiddenVisibility();
- if (getRelocationModel() == Reloc::PIC_ &&
- !ForceTLSNonPIC && // @LOCALMOD
- !Options.PositionIndependentExecutable) {
+ TLSModel::Model Model;
+ if (isPIC && !isPIE &&
+ !ForceTLSNonPIC) { // @LOCALMOD
if (isLocal || isHidden)
- return TLSModel::LocalDynamic;
+ Model = TLSModel::LocalDynamic;
else
- return TLSModel::GeneralDynamic;
+ Model = TLSModel::GeneralDynamic;
} else {
if (!isDeclaration || isHidden)
- return TLSModel::LocalExec;
+ Model = TLSModel::LocalExec;
else
- return TLSModel::InitialExec;
+ Model = TLSModel::InitialExec;
}
+
+ // If the user specified a more specific model, use that.
+ TLSModel::Model SelectedModel = getSelectedTLSModel(Var);
+ if (SelectedModel > Model)
+ return SelectedModel;
+
+ return Model;
}
/// getOptLevel - Returns the optimization level: None, Less,
@@ -143,4 +178,3 @@ void TargetMachine::setFunctionSections(bool V) {
void TargetMachine::setDataSections(bool V) {
DataSections = V;
}
-
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 08c732c388..417842b467 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -187,7 +187,7 @@ struct X86Operand : public MCParsedAsmOperand {
SMLoc getStartLoc() const { return StartLoc; }
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const { return EndLoc; }
-
+
SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
virtual void print(raw_ostream &OS) const {}
@@ -309,25 +309,25 @@ struct X86Operand : public MCParsedAsmOperand {
}
bool isMem() const { return Kind == Memory; }
- bool isMem8() const {
+ bool isMem8() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 8);
}
- bool isMem16() const {
+ bool isMem16() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 16);
}
- bool isMem32() const {
+ bool isMem32() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 32);
}
- bool isMem64() const {
+ bool isMem64() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 64);
}
- bool isMem80() const {
+ bool isMem80() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 80);
}
- bool isMem128() const {
+ bool isMem128() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 128);
}
- bool isMem256() const {
+ bool isMem256() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 256);
}
@@ -356,26 +356,26 @@ struct X86Operand : public MCParsedAsmOperand {
addExpr(Inst, getImm());
}
- void addMem8Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMem8Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
- void addMem16Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMem16Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
- void addMem32Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMem32Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
- void addMem64Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMem64Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
- void addMem80Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMem80Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
- void addMem128Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMem128Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
- void addMem256Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addMem256Operands(MCInst &Inst, unsigned N) const {
+ addMemOperands(Inst, N);
}
void addMemOperands(MCInst &Inst, unsigned N) const {
@@ -467,7 +467,7 @@ bool X86AsmParser::isSrcOp(X86Operand &Op) {
bool X86AsmParser::isDstOp(X86Operand &Op) {
unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
- return Op.isMem() &&
+ return Op.isMem() &&
(Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) &&
isa<MCConstantExpr>(Op.Mem.Disp) &&
cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
@@ -611,7 +611,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
if (getLexer().isNot(AsmToken::LBrac))
return ErrorOperand(Start, "Expected '[' token!");
Parser.Lex();
-
+
if (getLexer().is(AsmToken::Identifier)) {
// Parse BaseReg
if (ParseRegister(BaseReg, Start, End)) {
@@ -668,7 +668,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
End = Parser.getTok().getLoc();
if (!IndexReg)
ParseRegister(IndexReg, Start, End);
- else if (getParser().ParseExpression(Disp, End)) return 0;
+ else if (getParser().ParseExpression(Disp, End)) return 0;
}
}
@@ -916,15 +916,18 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
// If we have both a base register and an index register make sure they are
// both 64-bit or 32-bit registers.
+ // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
if (BaseReg != 0 && IndexReg != 0) {
if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
- !X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) &&
+ (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
+ X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
IndexReg != X86::RIZ) {
Error(IndexLoc, "index register is 32-bit, but base register is 64-bit");
return 0;
}
if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
- !X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) &&
+ (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
IndexReg != X86::EIZ){
Error(IndexLoc, "index register is 64-bit, but base register is 32-bit");
return 0;
@@ -944,7 +947,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
PatchedName != "setb" && PatchedName != "setnb")
PatchedName = PatchedName.substr(0, Name.size()-1);
-
+
// FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
const MCExpr *ExtraImmOp = 0;
if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
@@ -1217,7 +1220,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
}
}
}
-
+
// Transforms "int $3" into "int3" as a size optimization. We can't write an
// instalias with an immediate operand yet.
if (Name == "int" && Operands.size() == 2) {
@@ -1520,7 +1523,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
case Match_Success:
// Some instructions need post-processing to, for example, tweak which
// encoding is selected. Loop on it while changes happen so the
- // individual transformations can chain off each other.
+ // individual transformations can chain off each other.
while (processInstruction(Inst, Operands))
;
@@ -1558,12 +1561,12 @@ MatchAndEmitInstruction(SMLoc IDLoc,
// Otherwise, we assume that this may be an integer instruction, which comes
// in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
-
+
// Check for the various suffix matches.
Tmp[Base.size()] = Suffixes[0];
unsigned ErrorInfoIgnore;
unsigned Match1, Match2, Match3, Match4;
-
+
Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
Tmp[Base.size()] = Suffixes[1];
Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
@@ -1691,19 +1694,19 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
const MCExpr *Value;
if (getParser().ParseExpression(Value))
return true;
-
+
getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
-
+
if (getLexer().is(AsmToken::EndOfStatement))
break;
-
+
// FIXME: Improve diagnostic.
if (getLexer().isNot(AsmToken::Comma))
return Error(L, "unexpected token in directive");
Parser.Lex();
}
}
-
+
Parser.Lex();
return false;
}
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 5b402da3ad..45fd42f205 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -53,6 +53,8 @@ endif()
add_llvm_target(X86CodeGen ${sources})
+add_dependencies(LLVMX86CodeGen intrinsics_gen)
+
add_subdirectory(AsmParser)
add_subdirectory(Disassembler)
add_subdirectory(InstPrinter)
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index b13a00620b..d58e36c803 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -498,7 +498,38 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
} else {
baseReg = MCOperand::CreateReg(0);
}
-
+
+ // Check whether we are handling VSIB addressing mode for GATHER.
+ // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and
+ // we should use SIB_INDEX_XMM4|YMM4 for VSIB.
+ // I don't see a way to get the correct IndexReg in readSIB:
+ // We can tell whether it is VSIB or SIB after instruction ID is decoded,
+ // but instruction ID may not be decoded yet when calling readSIB.
+ uint32_t Opcode = mcInst.getOpcode();
+ bool IndexIs128 = (Opcode == X86::VGATHERDPDrm ||
+ Opcode == X86::VGATHERDPDYrm ||
+ Opcode == X86::VGATHERQPDrm ||
+ Opcode == X86::VGATHERDPSrm ||
+ Opcode == X86::VGATHERQPSrm ||
+ Opcode == X86::VPGATHERDQrm ||
+ Opcode == X86::VPGATHERDQYrm ||
+ Opcode == X86::VPGATHERQQrm ||
+ Opcode == X86::VPGATHERDDrm ||
+ Opcode == X86::VPGATHERQDrm);
+ bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm ||
+ Opcode == X86::VGATHERDPSYrm ||
+ Opcode == X86::VGATHERQPSYrm ||
+ Opcode == X86::VPGATHERQQYrm ||
+ Opcode == X86::VPGATHERDDYrm ||
+ Opcode == X86::VPGATHERQDYrm);
+ if (IndexIs128 || IndexIs256) {
+ unsigned IndexOffset = insn.sibIndex -
+ (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX);
+ SIBIndex IndexBase = IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
+ insn.sibIndex = (SIBIndex)(IndexBase +
+ (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset));
+ }
+
if (insn.sibIndex != SIB_INDEX_NONE) {
switch (insn.sibIndex) {
default:
@@ -509,6 +540,8 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
indexReg = MCOperand::CreateReg(X86::x); break;
EA_BASES_32BIT
EA_BASES_64BIT
+ REGS_XMM
+ REGS_YMM
#undef ENTRY
}
} else {
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index fae309b45d..e2caf6a2a8 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -310,11 +310,14 @@ typedef enum {
* SIBIndex - All possible values of the SIB index field.
* Borrows entries from ALL_EA_BASES with the special case that
* sib is synonymous with NONE.
+ * Vector SIB: index can be XMM or YMM.
*/
typedef enum {
SIB_INDEX_NONE,
#define ENTRY(x) SIB_INDEX_##x,
ALL_EA_BASES
+ REGS_XMM
+ REGS_YMM
#undef ENTRY
SIB_INDEX_max
} SIBIndex;
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index c3f46ebda0..b0e5be3162 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -483,17 +483,17 @@ namespace X86II {
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
// specified machine instruction.
//
- static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
+ inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
return TSFlags >> X86II::OpcodeShift;
}
- static inline bool hasImm(uint64_t TSFlags) {
+ inline bool hasImm(uint64_t TSFlags) {
return (TSFlags & X86II::ImmMask) != 0;
}
/// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
/// of the specified instruction.
- static inline unsigned getSizeOfImm(uint64_t TSFlags) {
+ inline unsigned getSizeOfImm(uint64_t TSFlags) {
switch (TSFlags & X86II::ImmMask) {
default: llvm_unreachable("Unknown immediate size");
case X86II::Imm8:
@@ -508,7 +508,7 @@ namespace X86II {
/// isImmPCRel - Return true if the immediate of the specified instruction's
/// TSFlags indicates that it is pc relative.
- static inline unsigned isImmPCRel(uint64_t TSFlags) {
+ inline unsigned isImmPCRel(uint64_t TSFlags) {
switch (TSFlags & X86II::ImmMask) {
default: llvm_unreachable("Unknown immediate size");
case X86II::Imm8PCRel:
@@ -531,7 +531,7 @@ namespace X86II {
/// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only
/// counted as one operand.
///
- static inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) {
+ inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) {
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg:
// FIXME: Remove this form.
@@ -594,7 +594,7 @@ namespace X86II {
/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or
/// higher) register? e.g. r8, xmm8, xmm13, etc.
- static inline bool isX86_64ExtendedReg(unsigned RegNo) {
+ inline bool isX86_64ExtendedReg(unsigned RegNo) {
switch (RegNo) {
default: break;
case X86::R8: case X86::R9: case X86::R10: case X86::R11:
@@ -616,7 +616,7 @@ namespace X86II {
return false;
}
- static inline bool isX86_64NonExtLowByteReg(unsigned reg) {
+ inline bool isX86_64NonExtLowByteReg(unsigned reg) {
return (reg == X86::SPL || reg == X86::BPL ||
reg == X86::SIL || reg == X86::DIL);
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 61e2fdcb62..7f7873acd1 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -621,7 +621,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_X = 0x0;
if (HasVEX_4VOp3)
- VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1);
+ // Instruction format for 4VOp3:
+ // src1(ModR/M), MemAddr, src3(VEX_4V)
+ // CurOp points to start of the MemoryOperand,
+ // it skips TIED_TO operands if exist, then increments past src1.
+ // CurOp + X86::AddrNumOperands will point to src3.
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands);
break;
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index ee66e7ce1c..599c8f8c6d 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -20,10 +20,10 @@
#include "X86TargetMachine.h"
#include "InstPrinter/X86ATTInstPrinter.h"
#include "llvm/CallingConv.h"
+#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
#include "llvm/Type.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -436,7 +436,9 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const MachineOperand &MO = MI->getOperand(OpNo);
switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
case 'a': // This is an address. Currently only 'i' and 'r' are expected.
if (MO.isImm()) {
O << MO.getImm();
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index af9efbd906..e263e44f40 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -52,7 +52,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken() ||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
- MMI.callsUnwindInit());
+ MMI.callsUnwindInit() || MMI.callsEHReturn());
}
static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
@@ -652,7 +652,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned SlotSize = RegInfo->getSlotSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
unsigned StackPtr = RegInfo->getStackRegister();
- unsigned BasePtr = RegInfo->getBaseRegister();
DebugLoc DL;
// If we're forcing a stack realignment we can't rely on just the frame
@@ -916,18 +915,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
UseLEA, TII, *RegInfo);
- // If we need a base pointer, set it up here. It's whatever the value
- // of the stack pointer is at this point. Any variable size objects
- // will be allocated after this, so we can still use the base pointer
- // to reference locals.
- if (RegInfo->hasBasePointer(MF)) {
- // Update the frame pointer with the current stack pointer.
- unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr;
- BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
- .addReg(StackPtr)
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
// Mark end of stack pointer adjustment.
MCSymbol *Label = MMI.getContext().CreateTempSymbol();
@@ -1184,16 +1171,7 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) con
int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
uint64_t StackSize = MFI->getStackSize();
- if (RegInfo->hasBasePointer(MF)) {
- assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!");
- if (FI < 0) {
- // Skip the saved EBP.
- return Offset + RegInfo->getSlotSize();
- } else {
- assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
- return Offset + StackSize;
- }
- } else if (RegInfo->needsStackRealignment(MF)) {
+ if (RegInfo->needsStackRealignment(MF)) {
if (FI < 0) {
// Skip the saved EBP.
return Offset + RegInfo->getSlotSize();
@@ -1224,14 +1202,9 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
// We can't calculate offset from frame pointer if the stack is realigned,
- // so enforce usage of stack/base pointer. The base pointer is used when we
- // have dynamic allocas in addition to dynamic realignment.
- if (RegInfo->hasBasePointer(MF))
- FrameReg = RegInfo->getBaseRegister();
- else if (RegInfo->needsStackRealignment(MF))
- FrameReg = RegInfo->getStackRegister();
- else
- FrameReg = RegInfo->getFrameRegister(MF);
+ // so enforce usage of stack pointer.
+ FrameReg = (RegInfo->needsStackRealignment(MF)) ?
+ RegInfo->getStackRegister() : RegInfo->getFrameRegister(MF);
return getFrameIndexOffset(MF, FI);
}
@@ -1368,10 +1341,6 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
"Slot for EBP register must be last in order to be found!");
(void)FrameIdx;
}
-
- // Spill the BasePtr if it's used.
- if (RegInfo->hasBasePointer(MF))
- MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
}
static bool
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index c8ff1cf0d0..2871a790c6 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -188,6 +188,7 @@ namespace {
private:
SDNode *Select(SDNode *N);
+ SDNode *SelectGather(SDNode *N, unsigned Opc);
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT);
@@ -2165,6 +2166,30 @@ static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
llvm_unreachable("unrecognized size for LdVT");
}
+/// SelectGather - Customized ISel for GATHER operations.
+///
+SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
+ // Operands of Gather: VSrc, Base, VIdx, VMask, Scale
+ SDValue Chain = Node->getOperand(0);
+ SDValue VSrc = Node->getOperand(2);
+ SDValue Base = Node->getOperand(3);
+ SDValue VIdx = Node->getOperand(4);
+ SDValue VMask = Node->getOperand(5);
+ ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6));
+ if (!Scale)
+ return 0;
+
+ // Memory Operands: Base, Scale, Index, Disp, Segment
+ SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue Segment = CurDAG->getRegister(0, MVT::i32);
+ const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx,
+ Disp, Segment, VMask, Chain};
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
+ VSrc.getValueType(), MVT::Other,
+ Ops, array_lengthof(Ops));
+ return ResNode;
+}
+
SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
EVT NVT = Node->getValueType(0);
unsigned Opc, MOpc;
@@ -2180,23 +2205,81 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
switch (Opcode) {
default: break;
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default: break;
+ case Intrinsic::x86_avx2_gather_d_pd:
+ case Intrinsic::x86_avx2_gather_d_pd_256:
+ case Intrinsic::x86_avx2_gather_q_pd:
+ case Intrinsic::x86_avx2_gather_q_pd_256:
+ case Intrinsic::x86_avx2_gather_d_ps:
+ case Intrinsic::x86_avx2_gather_d_ps_256:
+ case Intrinsic::x86_avx2_gather_q_ps:
+ case Intrinsic::x86_avx2_gather_q_ps_256:
+ case Intrinsic::x86_avx2_gather_d_q:
+ case Intrinsic::x86_avx2_gather_d_q_256:
+ case Intrinsic::x86_avx2_gather_q_q:
+ case Intrinsic::x86_avx2_gather_q_q_256:
+ case Intrinsic::x86_avx2_gather_d_d:
+ case Intrinsic::x86_avx2_gather_d_d_256:
+ case Intrinsic::x86_avx2_gather_q_d:
+ case Intrinsic::x86_avx2_gather_q_d_256: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic");
+ case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break;
+ case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break;
+ case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break;
+ case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break;
+ case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break;
+ case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break;
+ case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break;
+ case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break;
+ case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break;
+ case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break;
+ case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break;
+ case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break;
+ case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break;
+ case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break;
+ case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break;
+ case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break;
+ }
+ SDNode *RetVal = SelectGather(Node, Opc);
+ if (RetVal)
+ return RetVal;
+ break;
+ }
+ }
+ break;
+ }
case X86ISD::GlobalBaseReg:
return getGlobalBaseReg();
+
case X86ISD::ATOMOR64_DAG:
- return SelectAtomic64(Node, X86::ATOMOR6432);
case X86ISD::ATOMXOR64_DAG:
- return SelectAtomic64(Node, X86::ATOMXOR6432);
case X86ISD::ATOMADD64_DAG:
- return SelectAtomic64(Node, X86::ATOMADD6432);
case X86ISD::ATOMSUB64_DAG:
- return SelectAtomic64(Node, X86::ATOMSUB6432);
case X86ISD::ATOMNAND64_DAG:
- return SelectAtomic64(Node, X86::ATOMNAND6432);
case X86ISD::ATOMAND64_DAG:
- return SelectAtomic64(Node, X86::ATOMAND6432);
- case X86ISD::ATOMSWAP64_DAG:
- return SelectAtomic64(Node, X86::ATOMSWAP6432);
+ case X86ISD::ATOMSWAP64_DAG: {
+ unsigned Opc;
+ switch (Opcode) {
+ default: llvm_unreachable("Impossible intrinsic");
+ case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break;
+ case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break;
+ case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break;
+ case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break;
+ case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break;
+ case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break;
+ case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break;
+ }
+ SDNode *RetVal = SelectAtomic64(Node, Opc);
+ if (RetVal)
+ return RetVal;
+ break;
+ }
case ISD::ATOMIC_LOAD_ADD: {
SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index bf559c98dd..4197c35adb 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -99,6 +99,10 @@ static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
static SDValue Insert128BitVector(SDValue Result, SDValue Vec,
unsigned IdxVal, SelectionDAG &DAG,
DebugLoc dl) {
+ // Inserting UNDEF is Result
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return Result;
+
EVT VT = Vec.getValueType();
assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");
@@ -114,9 +118,8 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec,
* ElemsPerChunk);
SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
- Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
- VecIdx);
- return Result;
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
+ VecIdx);
}
/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
@@ -136,10 +139,13 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
if (Subtarget->isTargetEnvMacho()) {
if (is64Bit)
- return new X8664_MachoTargetObjectFile();
+ return new X86_64MachoTargetObjectFile();
return new TargetLoweringObjectFileMachO();
}
+ if (Subtarget->isTargetLinux())
+ return new X86LinuxTargetObjectFile();
+
// @LOCALMOD-BEGIN
if (Subtarget->isTargetNaCl())
return new TargetLoweringObjectFileNaCl();
@@ -3536,6 +3542,52 @@ static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) {
return true;
}
+//
+// Some special combinations that can be optimized.
+//
+static
+SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG) {
+ EVT VT = SVOp->getValueType(0);
+ DebugLoc dl = SVOp->getDebugLoc();
+
+ if (VT != MVT::v8i32 && VT != MVT::v8f32)
+ return SDValue();
+
+ ArrayRef<int> Mask = SVOp->getMask();
+
+ // These are the special masks that may be optimized.
+ static const int MaskToOptimizeEven[] = {0, 8, 2, 10, 4, 12, 6, 14};
+ static const int MaskToOptimizeOdd[] = {1, 9, 3, 11, 5, 13, 7, 15};
+ bool MatchEvenMask = true;
+ bool MatchOddMask = true;
+ for (int i=0; i<8; ++i) {
+ if (!isUndefOrEqual(Mask[i], MaskToOptimizeEven[i]))
+ MatchEvenMask = false;
+ if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i]))
+ MatchOddMask = false;
+ }
+ static const int CompactionMaskEven[] = {0, 2, -1, -1, 4, 6, -1, -1};
+ static const int CompactionMaskOdd [] = {1, 3, -1, -1, 5, 7, -1, -1};
+
+ const int *CompactionMask;
+ if (MatchEvenMask)
+ CompactionMask = CompactionMaskEven;
+ else if (MatchOddMask)
+ CompactionMask = CompactionMaskOdd;
+ else
+ return SDValue();
+
+ SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT);
+
+ SDValue Op0 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(0),
+ UndefNode, CompactionMask);
+ SDValue Op1 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(1),
+ UndefNode, CompactionMask);
+ static const int UnpackMask[] = {0, 8, 1, 9, 4, 12, 5, 13};
+ return DAG.getVectorShuffle(VT, dl, Op0, Op1, UnpackMask);
+}
+
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT,
@@ -5041,8 +5093,16 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
SDValue Sc = Op.getOperand(0);
if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
- Sc.getOpcode() != ISD::BUILD_VECTOR)
- return SDValue();
+ Sc.getOpcode() != ISD::BUILD_VECTOR) {
+
+ if (!Subtarget->hasAVX2())
+ return SDValue();
+
+ // Use the register form of the broadcast instruction available on AVX2.
+ if (VT.is256BitVector())
+ Sc = Extract128BitVector(Sc, 0, DAG, dl);
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc);
+ }
Ld = Sc.getOperand(0);
ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
@@ -6022,6 +6082,11 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT,
/// which could not be matched by any known target speficic shuffle
static SDValue
LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+
+ SDValue NewOp = Compact8x32ShuffleNode(SVOp, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+
EVT VT = SVOp->getValueType(0);
unsigned NumElems = VT.getVectorNumElements();
@@ -7504,11 +7569,6 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
const GlobalValue *GV = GA->getGlobal();
if (Subtarget->isTargetELF()) {
- // If GV is an alias then use the aliasee for determining
- // thread-localness.
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GV = GA->resolveAliasedGlobal(false);
-
TLSModel::Model model = getTargetMachine().getTLSModel(GV);
// @LOCALMOD-START
@@ -9995,7 +10055,6 @@ SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
}
SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
- MachineFunction &MF = DAG.getMachineFunction();
SDValue Chain = Op.getOperand(0);
SDValue Offset = Op.getOperand(1);
SDValue Handler = Op.getOperand(2);
@@ -10015,7 +10074,6 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
false, false, 0);
Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
- MF.getRegInfo().addLiveOut(StoreAddrReg);
return DAG.getNode(X86ISD::EH_RETURN, dl,
MVT::Other,
@@ -16240,12 +16298,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// wrong class. This can happen with constraints like {xmm0} where the
// target independent register mapper will just pick the first match it can
// find, ignoring the required type.
- if (VT == MVT::f32)
+
+ if (VT == MVT::f32 || VT == MVT::i32)
Res.second = &X86::FR32RegClass;
- else if (VT == MVT::f64)
+ else if (VT == MVT::f64 || VT == MVT::i64)
Res.second = &X86::FR64RegClass;
else if (X86::VR128RegClass.hasType(VT))
Res.second = &X86::VR128RegClass;
+ else if (X86::VR256RegClass.hasType(VT))
+ Res.second = &X86::VR256RegClass;
}
return Res;
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index fa1d67644d..aaef4a466d 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -55,11 +55,11 @@ struct X86AddressMode {
: BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) {
Base.Reg = 0;
}
-
-
+
+
void getFullAddress(SmallVectorImpl<MachineOperand> &MO) {
assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
-
+
if (BaseType == X86AddressMode::RegBase)
MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false,
false, false, false, 0, false));
@@ -67,16 +67,16 @@ struct X86AddressMode {
assert(BaseType == X86AddressMode::FrameIndexBase);
MO.push_back(MachineOperand::CreateFI(Base.FrameIndex));
}
-
+
MO.push_back(MachineOperand::CreateImm(Scale));
MO.push_back(MachineOperand::CreateReg(IndexReg, false, false,
false, false, false, 0, false));
-
+
if (GV)
MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags));
else
MO.push_back(MachineOperand::CreateImm(Disp));
-
+
MO.push_back(MachineOperand::CreateReg(0, false, false,
false, false, false, 0, false));
}
@@ -122,7 +122,7 @@ static inline const MachineInstrBuilder &
addFullAddress(const MachineInstrBuilder &MIB,
const X86AddressMode &AM) {
assert(AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8);
-
+
if (AM.BaseType == X86AddressMode::RegBase)
MIB.addReg(AM.Base.Reg);
else {
@@ -135,7 +135,7 @@ addFullAddress(const MachineInstrBuilder &MIB,
MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
else
MIB.addImm(AM.Disp);
-
+
return MIB.addReg(0);
}
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index cbec891d7e..bebe5f033c 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -367,6 +367,7 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
//
// SDI - SSE2 instructions with XD prefix.
// SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix.
+// SSDI - SSE2 instructions with XS prefix.
// SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
// PDI - SSE2 instructions with TB and OpSize prefixes.
// PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index dabb181cce..cb926f63a4 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -55,39 +55,39 @@ ReMatPICStubLoad("remat-pic-stub-load",
enum {
// Select which memory operand is being unfolded.
- // (stored in bits 0 - 7)
+ // (stored in bits 0 - 3)
TB_INDEX_0 = 0,
TB_INDEX_1 = 1,
TB_INDEX_2 = 2,
TB_INDEX_3 = 3,
- TB_INDEX_MASK = 0xff,
-
- // Minimum alignment required for load/store.
- // Used for RegOp->MemOp conversion.
- // (stored in bits 8 - 15)
- TB_ALIGN_SHIFT = 8,
- TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT,
- TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT,
- TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT,
- TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT,
+ TB_INDEX_MASK = 0xf,
// Do not insert the reverse map (MemOp -> RegOp) into the table.
// This may be needed because there is a many -> one mapping.
- TB_NO_REVERSE = 1 << 16,
+ TB_NO_REVERSE = 1 << 4,
// Do not insert the forward map (RegOp -> MemOp) into the table.
// This is needed for Native Client, which prohibits branch
// instructions from using a memory operand.
- TB_NO_FORWARD = 1 << 17,
+ TB_NO_FORWARD = 1 << 5,
- TB_FOLDED_LOAD = 1 << 18,
- TB_FOLDED_STORE = 1 << 19
+ TB_FOLDED_LOAD = 1 << 6,
+ TB_FOLDED_STORE = 1 << 7,
+
+ // Minimum alignment required for load/store.
+ // Used for RegOp->MemOp conversion.
+ // (stored in bits 8 - 15)
+ TB_ALIGN_SHIFT = 8,
+ TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT,
+ TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT,
+ TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT,
+ TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT
};
struct X86OpTblEntry {
uint16_t RegOp;
uint16_t MemOp;
- uint32_t Flags;
+ uint16_t Flags;
};
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
@@ -415,14 +415,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
{ X86::Int_COMISDrr, X86::Int_COMISDrm, 0 },
{ X86::Int_COMISSrr, X86::Int_COMISSrm, 0 },
- { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, TB_ALIGN_16 },
- { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, TB_ALIGN_16 },
- { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, TB_ALIGN_16 },
- { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, TB_ALIGN_16 },
- { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, TB_ALIGN_16 },
- { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 },
{ X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 },
{ X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
+ { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 },
+ { X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 },
{ X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
{ X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
{ X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
@@ -499,14 +495,20 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
// AVX 128-bit versions of foldable instructions
{ X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 },
{ X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 },
- { X86::Int_VCVTDQ2PDrr, X86::Int_VCVTDQ2PDrm, TB_ALIGN_16 },
- { X86::Int_VCVTDQ2PSrr, X86::Int_VCVTDQ2PSrm, TB_ALIGN_16 },
- { X86::Int_VCVTPD2DQrr, X86::Int_VCVTPD2DQrm, TB_ALIGN_16 },
- { X86::Int_VCVTPD2PSrr, X86::Int_VCVTPD2PSrm, TB_ALIGN_16 },
- { X86::Int_VCVTPS2DQrr, X86::Int_VCVTPS2DQrm, TB_ALIGN_16 },
- { X86::Int_VCVTPS2PDrr, X86::Int_VCVTPS2PDrm, 0 },
{ X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, 0 },
{ X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, 0 },
+ { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 },
+ { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,0 },
+ { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 },
+ { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm, 0 },
+ { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 },
+ { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,0 },
+ { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 },
+ { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm, 0 },
+ { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 },
+ { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
+ { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 },
+ { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 },
{ X86::FsVMOVAPDrr, X86::VMOVSDrm, TB_NO_REVERSE },
{ X86::FsVMOVAPSrr, X86::VMOVSSrm, TB_NO_REVERSE },
{ X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
@@ -815,17 +817,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
{ X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
{ X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 },
- { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 },
- { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm, 0 },
- { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 },
- { X86::Int_VCVTTSD2SIrr, X86::Int_VCVTTSD2SIrm, 0 },
- { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 },
- { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm, 0 },
- { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 },
- { X86::Int_VCVTTSS2SIrr, X86::Int_VCVTTSS2SIrm, 0 },
- { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 },
- { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
- { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, TB_ALIGN_16 },
+ { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, TB_ALIGN_16 },
{ X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, TB_ALIGN_16 },
{ X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
{ X86::VSQRTSDr, X86::VSQRTSDm, 0 },
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 5ae6b99e5a..4006dad684 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -145,9 +145,9 @@ class X86InstrInfo : public X86GenInstrInfo {
std::pair<unsigned, unsigned> > MemOp2RegOpTableType;
MemOp2RegOpTableType MemOp2RegOpTable;
- void AddTableEntry(RegOp2MemOpTableType &R2MTable,
- MemOp2RegOpTableType &M2RTable,
- unsigned RegOp, unsigned MemOp, unsigned Flags);
+ static void AddTableEntry(RegOp2MemOpTableType &R2MTable,
+ MemOp2RegOpTableType &M2RTable,
+ unsigned RegOp, unsigned MemOp, unsigned Flags);
public:
explicit X86InstrInfo(X86TargetMachine &tm);
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 892115b77e..0edd10a355 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -333,6 +333,12 @@ def f128mem : X86MemOperand<"printf128mem"> {
let ParserMatchClass = X86Mem128AsmOperand; }
def f256mem : X86MemOperand<"printf256mem">{
let ParserMatchClass = X86Mem256AsmOperand; }
+def v128mem : X86MemOperand<"printf128mem"> {
+ let MIOperandInfo = (ops ptr_rc, i8imm, VR128, i32imm, i8imm);
+ let ParserMatchClass = X86Mem128AsmOperand; }
+def v256mem : X86MemOperand<"printf256mem"> {
+ let MIOperandInfo = (ops ptr_rc, i8imm, VR256, i32imm, i8imm);
+ let ParserMatchClass = X86Mem256AsmOperand; }
}
// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 56542494b2..5319455dc5 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1418,10 +1418,10 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
string asm, Domain d, OpndItins itins> {
- def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
[(set DstRC:$dst, (OpNode SrcRC:$src))],
itins.rr, d>;
- def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
[(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
itins.rm, d>;
}
@@ -1622,7 +1622,7 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
"cvttsd2si{q}", SSE_CVT_SD2SI>,
XD, REX_W;
-let Pattern = []<dag> in {
+let Pattern = []<dag>, neverHasSideEffects = 1 in {
defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
"cvtss2si{l}\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
@@ -1630,14 +1630,16 @@ defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
"cvtss2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle, SSE_CVT_PS>, TB, VEX;
+ "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle, SSE_CVT_PS>, TB, VEX,
+ Requires<[HasAVX]>;
defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle, SSE_CVT_PS>, TB, VEX;
+ "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle, SSE_CVT_PS>, TB, VEX,
+ Requires<[HasAVX]>;
}
-let Pattern = []<dag> in {
+let Pattern = []<dag>, neverHasSideEffects = 1 in {
defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
"cvtss2si{l}\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_32>, XS;
@@ -1646,8 +1648,8 @@ defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
SSE_CVT_SS2SI_64>, XS, REX_W;
defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle, SSE_CVT_PS>,
- TB; /* PD SSE3 form is avaiable */
+ SSEPackedSingle, SSE_CVT_PS>, TB,
+ Requires<[HasSSE2]>;
}
let Predicates = [HasAVX] in {
@@ -1788,56 +1790,6 @@ def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
Requires<[HasSSE2]>;
}
-// Convert doubleword to packed single/double fp
-// SSE2 instructions without OpSize prefix
-def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))],
- IIC_SSE_CVT_PS_RR>,
- TB, VEX, Requires<[HasAVX]>;
-def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vcvtdq2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
- (bitconvert (memopv2i64 addr:$src))))],
- IIC_SSE_CVT_PS_RM>,
- TB, VEX, Requires<[HasAVX]>;
-def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))],
- IIC_SSE_CVT_PS_RR>,
- TB, Requires<[HasSSE2]>;
-def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
- (bitconvert (memopv2i64 addr:$src))))],
- IIC_SSE_CVT_PS_RM>,
- TB, Requires<[HasSSE2]>;
-
-// SSE2 instructions with XS prefix
-def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>,
- XS, VEX, Requires<[HasAVX]>;
-def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
- (bitconvert (memopv2i64 addr:$src))))],
- IIC_SSE_CVT_PD_RM>,
- XS, VEX, Requires<[HasAVX]>;
-def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>,
- XS, Requires<[HasSSE2]>;
-def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
- (bitconvert (memopv2i64 addr:$src))))],
- IIC_SSE_CVT_PD_RM>,
- XS, Requires<[HasSSE2]>;
-
-
// Convert packed single/double fp to doubleword
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", [],
@@ -1858,51 +1810,63 @@ def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PS_RM>;
-def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>,
- VEX;
-def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst),
- (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2dq
- (memop addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX;
-def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>;
-def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2dq
- (memop addr:$src)))],
- IIC_SSE_CVT_PS_RM>;
-
-// SSE2 packed instructions with XD prefix
-def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>,
- XD, VEX, Requires<[HasAVX]>;
-def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "vcvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
- (memop addr:$src)))],
- IIC_SSE_CVT_PD_RM>,
- XD, VEX, Requires<[HasAVX]>;
-def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>,
- XD, Requires<[HasSSE2]>;
-def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
- (memop addr:$src)))],
- IIC_SSE_CVT_PD_RM>,
- XD, Requires<[HasSSE2]>;
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse2_cvtps2dq VR128:$src),
+ (VCVTPS2DQrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)),
+ (VCVTPS2DQrm addr:$src)>;
+}
+
+let Predicates = [HasSSE2] in {
+ def : Pat<(int_x86_sse2_cvtps2dq VR128:$src),
+ (CVTPS2DQrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)),
+ (CVTPS2DQrm addr:$src)>;
+}
+
+// Convert Packed Double FP to Packed DW Integers
+let Predicates = [HasAVX] in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
+ (VCVTPD2DQrr VR128:$dst, VR128:$src)>;
+def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+ "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}",
+ (VCVTPD2DQYrr VR128:$dst, VR256:$src)>;
+}
+
+def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RM>;
+def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RR>;
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src),
+ (VCVTPD2DQrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)),
+ (VCVTPD2DQXrm addr:$src)>;
+}
+
+let Predicates = [HasSSE2] in {
+ def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src),
+ (CVTPD2DQrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)),
+ (CVTPD2DQrm addr:$src)>;
+}
// Convert with truncation packed single/double fp to doubleword
// SSE2 packed instructions with XS prefix
@@ -1914,7 +1878,7 @@ def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttps2dq
- (memop addr:$src)))],
+ (memopv4f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX;
def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
@@ -1935,14 +1899,19 @@ def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvttps2dq (memop addr:$src)))],
+ (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>;
let Predicates = [HasAVX] in {
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
- (Int_VCVTDQ2PSrr VR128:$src)>;
+ (VCVTDQ2PSrr VR128:$src)>;
def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
- (Int_VCVTDQ2PSrm addr:$src)>;
+ (VCVTDQ2PSrm addr:$src)>;
+
+ def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
+ (VCVTDQ2PSrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))),
+ (VCVTDQ2PSrm addr:$src)>;
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
(VCVTTPS2DQrr VR128:$src)>;
@@ -1962,9 +1931,14 @@ let Predicates = [HasAVX] in {
let Predicates = [HasSSE2] in {
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
- (Int_CVTDQ2PSrr VR128:$src)>;
+ (CVTDQ2PSrr VR128:$src)>;
def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
- (Int_CVTDQ2PSrm addr:$src)>;
+ (CVTDQ2PSrm addr:$src)>;
+
+ def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
+ (CVTDQ2PSrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))),
+ (CVTDQ2PSrm addr:$src)>;
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
(CVTTPS2DQrr VR128:$src)>;
@@ -1977,12 +1951,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
[(set VR128:$dst,
(int_x86_sse2_cvttpd2dq VR128:$src))],
IIC_SSE_CVT_PD_RR>, VEX;
-let isCodeGenOnly = 1 in
-def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (memop addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX;
+
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
@@ -1990,31 +1959,38 @@ def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (memop addr:$src)))],
+ (memopv2f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>;
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
-def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>, VEX;
// XMM only
-def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttpd2dqx\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>, VEX;
+def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
+ (VCVTTPD2DQrr VR128:$dst, VR128:$src)>;
def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttpd2dqx\t{$src, $dst|$dst, $src}", [],
+ "cvttpd2dqx\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memopv2f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX;
// YMM only
def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvttpd2dqy\t{$src, $dst|$dst, $src}", [],
+ "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RR>, VEX;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "cvttpd2dqy\t{$src, $dst|$dst, $src}", [],
+ "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
+def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
+ (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>;
+
+let Predicates = [HasAVX] in {
+ def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
+ (VCVTTPD2DQYrr VR256:$src)>;
+ def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))),
+ (VCVTTPD2DQYrm addr:$src)>;
+} // Predicates = [HasAVX]
// Convert packed single to packed double
let Predicates = [HasAVX] in {
@@ -2032,35 +2008,71 @@ def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, TB, VEX;
}
+
+let Predicates = [HasSSE2] in {
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RR>, TB;
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, TB;
+}
+
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse2_cvtps2pd VR128:$src),
+ (VCVTPS2PDrr VR128:$src)>;
+}
-def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>,
- TB, VEX, Requires<[HasAVX]>;
-def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2pd
- (load addr:$src)))],
- IIC_SSE_CVT_PD_RM>,
- TB, VEX, Requires<[HasAVX]>;
-def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>,
- TB, Requires<[HasSSE2]>;
-def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtps2pd
- (load addr:$src)))],
- IIC_SSE_CVT_PD_RM>,
- TB, Requires<[HasSSE2]>;
+let Predicates = [HasSSE2] in {
+ def : Pat<(int_x86_sse2_cvtps2pd VR128:$src),
+ (CVTPS2PDrr VR128:$src)>;
+}
+
+// Convert Packed DW Integers to Packed Double FP
+let Predicates = [HasAVX] in {
+def VCVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrm : SSDI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrr : SSDI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
+def CVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RR>;
+def CVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
+ IIC_SSE_CVT_PD_RM>;
+
+// 128 bit register conversion intrinsics
+let Predicates = [HasAVX] in
+def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src),
+ (VCVTDQ2PDrr VR128:$src)>;
+
+let Predicates = [HasSSE2] in
+def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src),
+ (CVTDQ2PDrr VR128:$src)>;
+
+// AVX 256-bit register conversion intrinsics
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
+ (VCVTDQ2PDYrr VR128:$src)>;
+ def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))),
+ (VCVTDQ2PDYrm addr:$src)>;
+
+ def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
+ (VCVTPD2DQYrr VR256:$src)>;
+ def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
+ (VCVTPD2DQYrm addr:$src)>;
+
+ def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
+ (VCVTDQ2PDYrr VR128:$src)>;
+ def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
+ (VCVTDQ2PDYrm addr:$src)>;
+} // Predicates = [HasAVX]
// Convert packed double to packed single
// The assembler can recognize rr 256-bit instructions by seeing a ymm
@@ -2069,25 +2081,24 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RR>, VEX;
-def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>, VEX;
// XMM only
-def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2psx\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>, VEX;
+def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
+ (VCVTPD2PSrr VR128:$dst, VR128:$src)>;
def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2psx\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, VEX;
// YMM only
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "cvtpd2psy\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RR>, VEX;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "cvtpd2psy\t{$src, $dst|$dst, $src}", [],
+ "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
+def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
+ (VCVTPD2PSYrr VR128:$dst, VR256:$src)>;
+
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RR>;
@@ -2096,64 +2107,60 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
IIC_SSE_CVT_PD_RM>;
-def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
-def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst),
- (ins f128mem:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
- (memop addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
-def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
-def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
- (memop addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src),
+ (VCVTPD2PSrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)),
+ (VCVTPD2PSXrm addr:$src)>;
+}
+
+let Predicates = [HasSSE2] in {
+ def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src),
+ (CVTPD2PSrr VR128:$src)>;
+ def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)),
+ (CVTPD2PSrm addr:$src)>;
+}
// AVX 256-bit register conversion intrinsics
// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
// whenever possible to avoid declaring two versions of each one.
-def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
- (VCVTDQ2PSYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))),
- (VCVTDQ2PSYrm addr:$src)>;
-
-def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
- (VCVTPD2PSYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)),
- (VCVTPD2PSYrm addr:$src)>;
-
-def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src),
- (VCVTPS2DQYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)),
- (VCVTPS2DQYrm addr:$src)>;
-
-def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src),
- (VCVTPS2PDYrr VR128:$src)>;
-def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)),
- (VCVTPS2PDYrm addr:$src)>;
-
-def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
- (VCVTTPD2DQYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
- (VCVTTPD2DQYrm addr:$src)>;
-
-// Match fround and fextend for 128/256-bit conversions
-def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
- (VCVTPD2PSYrr VR256:$src)>;
-def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
- (VCVTPD2PSYrm addr:$src)>;
-
-def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
- (VCVTPS2PDYrr VR128:$src)>;
-def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
- (VCVTPS2PDYrm addr:$src)>;
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
+ (VCVTDQ2PSYrr VR256:$src)>;
+ def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))),
+ (VCVTDQ2PSYrm addr:$src)>;
+
+ def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
+ (VCVTPD2PSYrr VR256:$src)>;
+ def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)),
+ (VCVTPD2PSYrm addr:$src)>;
+
+ def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src),
+ (VCVTPS2DQYrr VR256:$src)>;
+ def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)),
+ (VCVTPS2DQYrm addr:$src)>;
+
+ def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src),
+ (VCVTPS2PDYrr VR128:$src)>;
+ def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)),
+ (VCVTPS2PDYrm addr:$src)>;
+
+ def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
+ (VCVTTPD2DQYrr VR256:$src)>;
+ def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
+ (VCVTTPD2DQYrm addr:$src)>;
+
+ // Match fround and fextend for 128/256-bit conversions
+ def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
+ (VCVTPD2PSYrr VR256:$src)>;
+ def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
+ (VCVTPD2PSYrm addr:$src)>;
+
+ def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
+ (VCVTPS2PDYrr VR128:$src)>;
+ def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
+ (VCVTPS2PDYrm addr:$src)>;
+}
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Compare Instructions
@@ -4889,80 +4896,6 @@ def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS;
//===---------------------------------------------------------------------===//
-// SSE3 - Conversion Instructions
-//===---------------------------------------------------------------------===//
-
-// Convert Packed Double FP to Packed DW Integers
-let Predicates = [HasAVX] in {
-// The assembler can recognize rr 256-bit instructions by seeing a ymm
-// register, but the same isn't true when using memory operands instead.
-// Provide other assembly rr and rm forms to address this explicitly.
-def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTPD2DQXrYr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-
-// XMM only
-def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
-
-// YMM only
-def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
- "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
- "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
-}
-
-def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RM>;
-def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>;
-
-def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
- (VCVTTPD2DQYrr VR256:$src)>;
-def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))),
- (VCVTTPD2DQYrm addr:$src)>;
-
-// Convert Packed DW Integers to Packed Double FP
-let Predicates = [HasAVX] in {
-def VCVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDYrm : SSDI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDYrr : SSDI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-}
-
-def CVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>;
-def CVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RM>;
-
-// AVX 256-bit register conversion intrinsics
-def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
- (VCVTDQ2PDYrr VR128:$src)>;
-def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))),
- (VCVTDQ2PDYrm addr:$src)>;
-
-def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
- (VCVTPD2DQYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
- (VCVTPD2DQYrm addr:$src)>;
-
-def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
- (VCVTDQ2PDYrr VR128:$src)>;
-def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
- (VCVTDQ2PDYrm addr:$src)>;
-
-//===---------------------------------------------------------------------===//
// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
//===---------------------------------------------------------------------===//
multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
@@ -7339,8 +7272,8 @@ let ExeDomain = SSEPackedSingle in {
int_x86_avx2_vbroadcast_ss_ps_256>;
}
let ExeDomain = SSEPackedDouble in
-def VBROADCASTSDrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
- int_x86_avx2_vbroadcast_sd_pd_256>;
+def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
+ int_x86_avx2_vbroadcast_sd_pd_256>;
let Predicates = [HasAVX2] in
def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
@@ -7751,6 +7684,31 @@ let Predicates = [HasAVX2] in {
def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
(VPBROADCASTQYrm addr:$src)>;
+ def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))),
+ (VPBROADCASTBrr VR128:$src)>;
+ def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))),
+ (VPBROADCASTBYrr VR128:$src)>;
+ def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))),
+ (VPBROADCASTWrr VR128:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))),
+ (VPBROADCASTWYrr VR128:$src)>;
+ def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))),
+ (VPBROADCASTDrr VR128:$src)>;
+ def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))),
+ (VPBROADCASTDYrr VR128:$src)>;
+ def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))),
+ (VPBROADCASTQrr VR128:$src)>;
+ def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))),
+ (VPBROADCASTQYrr VR128:$src)>;
+ def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))),
+ (VBROADCASTSSrr VR128:$src)>;
+ def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))),
+ (VBROADCASTSSYrr VR128:$src)>;
+ def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))),
+ (VPBROADCASTQrr VR128:$src)>;
+ def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))),
+ (VBROADCASTSDYrr VR128:$src)>;
+
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
let AddedComplexity = 20 in {
@@ -7761,7 +7719,7 @@ let Predicates = [HasAVX2] in {
(VBROADCASTSSYrr
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
- (VBROADCASTSDrr
+ (VBROADCASTSDYrr
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd))>;
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
@@ -7771,7 +7729,7 @@ let Predicates = [HasAVX2] in {
(VBROADCASTSSYrr
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>;
def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
- (VBROADCASTSDrr
+ (VBROADCASTSDYrr
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd))>;
}
}
@@ -8061,3 +8019,55 @@ defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
+
+//===----------------------------------------------------------------------===//
+// VGATHER - GATHER Operations
+multiclass avx2_gather<bits<8> opc, string OpcodeStr,
+ RegisterClass RC256, X86MemOperand memop256,
+ Intrinsic IntGather128, Intrinsic IntGather256> {
+ def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, v128mem:$src2, VR128:$mask),
+ !strconcat(OpcodeStr,
+ "\t{$src1, $src2, $mask|$mask, $src2, $src1}"),
+ []>, VEX_4VOp3;
+ def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst),
+ (ins RC256:$src1, memop256:$src2, RC256:$mask),
+ !strconcat(OpcodeStr,
+ "\t{$src1, $src2, $mask|$mask, $src2, $src1}"),
+ []>, VEX_4VOp3, VEX_L;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd",
+ VR256, v128mem,
+ int_x86_avx2_gather_d_pd,
+ int_x86_avx2_gather_d_pd_256>, VEX_W;
+ defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd",
+ VR256, v256mem,
+ int_x86_avx2_gather_q_pd,
+ int_x86_avx2_gather_q_pd_256>, VEX_W;
+ defm VGATHERDPS : avx2_gather<0x92, "vgatherdps",
+ VR256, v256mem,
+ int_x86_avx2_gather_d_ps,
+ int_x86_avx2_gather_d_ps_256>;
+ defm VGATHERQPS : avx2_gather<0x93, "vgatherqps",
+ VR128, v256mem,
+ int_x86_avx2_gather_q_ps,
+ int_x86_avx2_gather_q_ps_256>;
+ defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq",
+ VR256, v128mem,
+ int_x86_avx2_gather_d_q,
+ int_x86_avx2_gather_d_q_256>, VEX_W;
+ defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq",
+ VR256, v256mem,
+ int_x86_avx2_gather_q_q,
+ int_x86_avx2_gather_q_q_256>, VEX_W;
+ defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd",
+ VR256, v256mem,
+ int_x86_avx2_gather_d_d,
+ int_x86_avx2_gather_d_d_256>;
+ defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd",
+ VR128, v256mem,
+ int_x86_avx2_gather_q_d,
+ int_x86_avx2_gather_q_d_256>;
+}
diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td
index 6a8f0c8486..6d3548f093 100644
--- a/lib/Target/X86/X86InstrVMX.td
+++ b/lib/Target/X86/X86InstrVMX.td
@@ -17,17 +17,17 @@
// 66 0F 38 80
def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
- "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
Requires<[In32BitMode]>;
def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
- "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
Requires<[In64BitMode]>;
// 66 0F 38 81
def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
- "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
Requires<[In32BitMode]>;
def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
- "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
Requires<[In64BitMode]>;
// 0F 01 C1
def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 1be4c3864a..ed086dd8ad 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -50,10 +50,6 @@ ForceStackAlign("force-align-stack",
" needed for the function."),
cl::init(false), cl::Hidden);
-cl::opt<bool>
-EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
- cl::desc("Enable use of a base pointer for complex stack frames"));
-
// @LOCALMOD-BEGIN
extern cl::opt<bool> FlagUseZeroBasedSandbox;
extern cl::opt<bool> FlagRestrictR15;
@@ -77,12 +73,10 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
SlotSize = 8;
StackPtr = X86::RSP;
FramePtr = X86::RBP;
- BasePtr = X86::RBX;
} else {
SlotSize = 4;
StackPtr = X86::ESP;
FramePtr = X86::EBP;
- BasePtr = X86::EBX;
}
}
@@ -301,20 +295,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(*I);
}
- // Set the base-pointer register and its aliases as reserved if needed.
- if (hasBasePointer(MF)) {
- CallingConv::ID CC = MF.getFunction()->getCallingConv();
- const uint32_t* RegMask = getCallPreservedMask(CC);
- if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister()))
- report_fatal_error(
- "Stack realignment in presence of dynamic allocas is not supported with"
- "this calling convention.");
-
- Reserved.set(getBaseRegister());
- for (MCSubRegIterator I(getBaseRegister(), this); I.isValid(); ++I)
- Reserved.set(*I);
- }
-
// Mark the segment registers as reserved.
Reserved.set(X86::CS);
Reserved.set(X86::SS);
@@ -384,35 +364,10 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
-
- if (!EnableBasePointer)
- return false;
-
- // When we need stack realignment and there are dynamic allocas, we can't
- // reference off of the stack pointer, so we reserve a base pointer.
- if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
- return true;
-
- return false;
-}
-
bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- const MachineRegisterInfo *MRI = &MF.getRegInfo();
- if (!MF.getTarget().Options.RealignStack)
- return false;
-
- // Stack realignment requires a frame pointer. If we already started
- // register allocation with frame pointer elimination, it is too late now.
- if (!MRI->canReserveReg(FramePtr))
- return false;
-
- // If base pointer is necessary. Check that it isn't too late to reserve it.
- if (MFI->hasVarSizedObjects())
- return MRI->canReserveReg(BasePtr);
- return true;
+ return (MF.getTarget().Options.RealignStack &&
+ !MFI->hasVarSizedObjects());
}
bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
@@ -422,6 +377,13 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
F->hasFnAttr(Attribute::StackAlignment));
+ // FIXME: Currently we don't support stack realignment for functions with
+ // variable-sized allocas.
+ // FIXME: It's more complicated than this...
+ if (0 && requiresRealignment && MFI->hasVarSizedObjects())
+ report_fatal_error(
+ "Stack realignment in presence of dynamic allocas is not supported");
+
// If we've requested that we force align the stack do so now.
if (ForceStackAlign)
return canRealignStack(MF);
@@ -561,9 +523,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned Opc = MI.getOpcode();
bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm;
- if (hasBasePointer(MF))
- BasePtr = getBaseRegister();
- else if (needsStackRealignment(MF))
+ if (needsStackRealignment(MF))
BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
else if (AfterFPPop)
BasePtr = StackPtr;
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 1bc32cbb78..ee69842b10 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -50,11 +50,6 @@ private:
///
unsigned FramePtr;
- /// BasePtr - X86 physical register used as a base ptr in complex stack
- /// frames. I.e., when we need a 3rd base, not just SP and FP, due to
- /// variable size stack objects.
- unsigned BasePtr;
-
public:
X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
@@ -111,8 +106,6 @@ public:
/// register scavenger to determine what registers are free.
BitVector getReservedRegs(const MachineFunction &MF) const;
- bool hasBasePointer(const MachineFunction &MF) const;
-
bool canRealignStack(const MachineFunction &MF) const;
bool needsStackRealignment(const MachineFunction &MF) const;
@@ -130,7 +123,6 @@ public:
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
unsigned getStackRegister() const { return StackPtr; }
- unsigned getBaseRegister() const { return BasePtr; }
// FIXME: Move to FrameInfok
unsigned getSlotSize() const { return SlotSize; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 6d05a91a32..20acc2bab3 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -150,44 +150,44 @@ TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
bool X86PassConfig::addInstSelector() {
// Install an instruction selector.
- PM->add(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
+ addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
// For ELF, cleanup any local-dynamic TLS accesses.
if (getX86Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None)
- PM->add(createCleanupLocalDynamicTLSPass());
+ addPass(createCleanupLocalDynamicTLSPass());
// For 32-bit, prepend instructions to set the "global base reg" for PIC.
if (!getX86Subtarget().is64Bit())
- PM->add(createGlobalBaseRegPass());
+ addPass(createGlobalBaseRegPass());
return false;
}
bool X86PassConfig::addPreRegAlloc() {
- PM->add(createX86MaxStackAlignmentHeuristicPass());
+ addPass(createX86MaxStackAlignmentHeuristicPass());
return false; // -print-machineinstr shouldn't print after this.
}
bool X86PassConfig::addPostRegAlloc() {
- PM->add(createX86FloatingPointStackifierPass());
+ addPass(createX86FloatingPointStackifierPass());
return true; // -print-machineinstr should print after this.
}
bool X86PassConfig::addPreEmitPass() {
bool ShouldPrint = false;
if (getOptLevel() != CodeGenOpt::None && getX86Subtarget().hasSSE2()) {
- PM->add(createExecutionDependencyFixPass(&X86::VR128RegClass));
+ addPass(createExecutionDependencyFixPass(&X86::VR128RegClass));
ShouldPrint = true;
}
if (getX86Subtarget().hasAVX() && UseVZeroUpper) {
- PM->add(createX86IssueVZeroUpperPass());
+ addPass(createX86IssueVZeroUpperPass());
ShouldPrint = true;
}
// @LOCALMOD-START
if (getX86Subtarget().isTargetNaCl()) {
- PM->add(createX86NaClRewritePass());
+ addPass(createX86NaClRewritePass());
ShouldPrint = true;
}
// @LOCALMOD-END
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 32bfba96bb..4f39d68d40 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -10,17 +10,19 @@
#include "X86TargetObjectFile.h"
#include "X86TargetMachine.h"
#include "X86Subtarget.h" // @LOCALMOD
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSectionELF.h" // @LOCALMOD
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
using namespace llvm;
using namespace dwarf;
-const MCExpr *X8664_MachoTargetObjectFile::
+const MCExpr *X86_64MachoTargetObjectFile::
getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI, unsigned Encoding,
MCStreamer &Streamer) const {
@@ -39,12 +41,18 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
}
-MCSymbol *X8664_MachoTargetObjectFile::
+MCSymbol *X86_64MachoTargetObjectFile::
getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI) const {
return Mang->getSymbol(GV);
}
+void
+X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+}
+
// @LOCALMOD-START
// NOTE: this was largely lifted from
// lib/Target/ARM/ARMTargetObjectFile.cpp
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index 34c1234eae..5fac48e57a 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -16,9 +16,9 @@
namespace llvm {
- /// X8664_MachoTargetObjectFile - This TLOF implementation is used for Darwin
+ /// X86_64MachoTargetObjectFile - This TLOF implementation is used for Darwin
/// x86-64.
- class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
+ class X86_64MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
public:
virtual const MCExpr *
getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
@@ -32,6 +32,12 @@ namespace llvm {
MachineModuleInfo *MMI) const;
};
+ /// X86LinuxTargetObjectFile - This implementation is used for linux x86
+ /// and x86-64.
+ class X86LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
+ virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+ };
+
// @LOCALMOD-BEGIN
class TargetLoweringObjectFileNaCl : public TargetLoweringObjectFileELF {
public:
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index 0d59572a0d..ca94f03a64 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -22,5 +22,7 @@ add_llvm_target(XCoreCodeGen
XCoreSelectionDAGInfo.cpp
)
+add_dependencies(LLVMXCoreCodeGen intrinsics_gen)
+
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 8906b2459e..c76866f47b 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -18,9 +18,9 @@
#include "XCoreSubtarget.h"
#include "XCoreTargetMachine.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -260,7 +260,17 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant,const char *ExtraCode,
raw_ostream &O) {
- printOperand(MI, OpNo, O);
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0])
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ }
+
+printOperand(MI, OpNo, O);
return false;
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index b25a08d25c..b2f0603776 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -796,7 +796,7 @@ def MKMSK_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$size),
def MKMSK_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$size),
"mkmsk $dst, $size",
- [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), 0xffffffff))]>;
+ [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), -1))]>;
def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type),
"getr $dst, $type",
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 5afd5a1aff..11ec86b0fa 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -55,7 +55,7 @@ TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) {
}
bool XCorePassConfig::addInstSelector() {
- PM->add(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel()));
+ addPass(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel()));
return false;
}