aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/ARM
diff options
context:
space:
mode:
authorDerek Schuff <dschuff@chromium.org>2012-08-21 17:32:13 -0700
committerDerek Schuff <dschuff@chromium.org>2012-08-21 17:32:13 -0700
commit66f271497ed92ebb05c66f54616e512606a2e314 (patch)
tree96d54cd64804ab7c9f2f52f680c3301aa789ce1d /lib/Target/ARM
parentb62e9abf7dd9e39c95327914ce9dfe216386824a (diff)
parentbc363931085587bac42a40653962a3e5acd1ffce (diff)
Merge up to r162331, git commit bc363931085587bac42a40653962a3e5acd1ffce
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARM.td8
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp28
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp318
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h12
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp12
-rw-r--r--lib/Target/ARM/ARMCallingConv.td31
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp70
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp6
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp2
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp228
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp10
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp137
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp386
-rw-r--r--lib/Target/ARM/ARMISelLowering.h15
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td103
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td218
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td12
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td172
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td39
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp6
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp6
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td8
-rw-r--r--lib/Target/ARM/ARMScheduleA8.td22
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td29
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp13
-rw-r--r--lib/Target/ARM/ARMSubtarget.h5
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp200
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp863
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp50
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h106
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp90
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp13
-rw-r--r--lib/Target/ARM/README.txt21
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp42
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h5
36 files changed, 1911 insertions, 1376 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 7af8b9d909..06ed89395f 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -204,17 +204,17 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
FeatureDSPThumb2]>;
// V7a Processors.
-def : Processor<"cortex-a8", CortexA8Itineraries,
+def : ProcessorModel<"cortex-a8", CortexA8Model,
// @LOCALMOD-BEGIN
// TODO(pdox): Resolve this mismatch.
[ProcA8, HasV7Ops, FeatureDB]>;
// FeatureNEON, FeatureDSPThumb2, FeatureHasRAS]>;
// @LOCALMOD-END
-def : Processor<"cortex-a9", CortexA9Itineraries,
+def : ProcessorModel<"cortex-a9", CortexA9Model,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
FeatureDSPThumb2, FeatureHasRAS]>;
-def : Processor<"cortex-a9-mp", CortexA9Itineraries,
+def : ProcessorModel<"cortex-a9-mp", CortexA9Model,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
FeatureDSPThumb2, FeatureMP,
FeatureHasRAS]>;
@@ -228,7 +228,7 @@ def : ProcNoItin<"cortex-m3", [HasV7Ops,
def : ProcNoItin<"cortex-m4", [HasV7Ops,
FeatureThumb2, FeatureNoARM, FeatureDB,
FeatureHWDiv, FeatureDSPThumb2,
- FeatureT2XtPk, FeatureVFP2,
+ FeatureT2XtPk, FeatureVFP4,
FeatureVFPOnlySP, FeatureMClass]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 76cd0c389d..95caf529d6 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -622,10 +622,24 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
}
- // These modifiers are not yet supported.
+ // This modifier is not yet supported.
case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
- case 'H': // The highest-numbered register of a pair.
return true;
+ case 'H': { // The highest-numbered register of a pair.
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (!MO.isReg())
+ return true;
+ const TargetRegisterClass &RC = ARM::GPRRegClass;
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+
+ unsigned RegIdx = TRI->getEncodingValue(MO.getReg());
+ RegIdx |= 1; //The odd register is also the higher-numbered one of a pair.
+
+ unsigned Reg = RC.getRegister(RegIdx);
+ O << ARMInstPrinter::getRegisterName(Reg);
+ return false;
+ }
}
}
@@ -1253,8 +1267,14 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
assert(SrcReg == ARM::SP &&
"Only stack pointer as a source reg is supported");
for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset;
- i != NumOps; ++i)
- RegList.push_back(MI->getOperand(i).getReg());
+ i != NumOps; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Actually, there should never be any impdef stuff here. Skip it
+ // temporary to workaround PR11902.
+ if (MO.isImplicit())
+ continue;
+ RegList.push_back(MO.getReg());
+ }
break;
case ARM::STR_PRE_IMM:
case ARM::STR_PRE_REG:
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 08e55429ce..aab8825444 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -795,8 +795,28 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else
llvm_unreachable("Unknown reg class!");
break;
+ case 24:
+ if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
+ // Use aligned spills if the stack can be realigned.
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
case 32:
- if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
@@ -868,6 +888,8 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
}
break;
case ARM::VST1q64:
+ case ARM::VST1d64TPseudo:
+ case ARM::VST1d64QPseudo:
if (MI->getOperand(0).isFI() &&
MI->getOperand(2).getSubReg() == 0) {
FrameIndex = MI->getOperand(0).getIndex();
@@ -942,8 +964,28 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else
llvm_unreachable("Unknown reg class!");
break;
- case 32:
- if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+ case 24:
+ if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO));
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
+ case 32:
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
.addFrameIndex(FI).addImm(16)
@@ -1016,6 +1058,8 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
}
break;
case ARM::VLD1q64:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD1d64QPseudo:
if (MI->getOperand(1).isFI() &&
MI->getOperand(0).getSubReg() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
@@ -1524,6 +1568,139 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
}
+/// Identify instructions that can be folded into a MOVCC instruction, and
+/// return the corresponding opcode for the predicated pseudo-instruction.
+static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI,
+ const MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return 0;
+ if (!MRI.hasOneNonDBGUse(Reg))
+ return 0;
+ MI = MRI.getVRegDef(Reg);
+ if (!MI)
+ return 0;
+ // Check if MI has any non-dead defs or physreg uses. This also detects
+ // predicated instructions which will be reading CPSR.
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Reject frame index operands, PEI can't handle the predicated pseudos.
+ if (MO.isFI() || MO.isCPI() || MO.isJTI())
+ return 0;
+ if (!MO.isReg())
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ return 0;
+ if (MO.isDef() && !MO.isDead())
+ return 0;
+ }
+ switch (MI->getOpcode()) {
+ default: return 0;
+ case ARM::ANDri: return ARM::ANDCCri;
+ case ARM::ANDrr: return ARM::ANDCCrr;
+ case ARM::ANDrsi: return ARM::ANDCCrsi;
+ case ARM::ANDrsr: return ARM::ANDCCrsr;
+ case ARM::t2ANDri: return ARM::t2ANDCCri;
+ case ARM::t2ANDrr: return ARM::t2ANDCCrr;
+ case ARM::t2ANDrs: return ARM::t2ANDCCrs;
+ case ARM::EORri: return ARM::EORCCri;
+ case ARM::EORrr: return ARM::EORCCrr;
+ case ARM::EORrsi: return ARM::EORCCrsi;
+ case ARM::EORrsr: return ARM::EORCCrsr;
+ case ARM::t2EORri: return ARM::t2EORCCri;
+ case ARM::t2EORrr: return ARM::t2EORCCrr;
+ case ARM::t2EORrs: return ARM::t2EORCCrs;
+ case ARM::ORRri: return ARM::ORRCCri;
+ case ARM::ORRrr: return ARM::ORRCCrr;
+ case ARM::ORRrsi: return ARM::ORRCCrsi;
+ case ARM::ORRrsr: return ARM::ORRCCrsr;
+ case ARM::t2ORRri: return ARM::t2ORRCCri;
+ case ARM::t2ORRrr: return ARM::t2ORRCCrr;
+ case ARM::t2ORRrs: return ARM::t2ORRCCrs;
+
+ // ARM ADD/SUB
+ case ARM::ADDri: return ARM::ADDCCri;
+ case ARM::ADDrr: return ARM::ADDCCrr;
+ case ARM::ADDrsi: return ARM::ADDCCrsi;
+ case ARM::ADDrsr: return ARM::ADDCCrsr;
+ case ARM::SUBri: return ARM::SUBCCri;
+ case ARM::SUBrr: return ARM::SUBCCrr;
+ case ARM::SUBrsi: return ARM::SUBCCrsi;
+ case ARM::SUBrsr: return ARM::SUBCCrsr;
+
+ // Thumb2 ADD/SUB
+ case ARM::t2ADDri: return ARM::t2ADDCCri;
+ case ARM::t2ADDri12: return ARM::t2ADDCCri12;
+ case ARM::t2ADDrr: return ARM::t2ADDCCrr;
+ case ARM::t2ADDrs: return ARM::t2ADDCCrs;
+ case ARM::t2SUBri: return ARM::t2SUBCCri;
+ case ARM::t2SUBri12: return ARM::t2SUBCCri12;
+ case ARM::t2SUBrr: return ARM::t2SUBCCrr;
+ case ARM::t2SUBrs: return ARM::t2SUBCCrs;
+ }
+}
+
+bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ unsigned &TrueOp, unsigned &FalseOp,
+ bool &Optimizable) const {
+ assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
+ "Unknown select instruction");
+ // MOVCC operands:
+ // 0: Def.
+ // 1: True use.
+ // 2: False use.
+ // 3: Condition code.
+ // 4: CPSR use.
+ TrueOp = 1;
+ FalseOp = 2;
+ Cond.push_back(MI->getOperand(3));
+ Cond.push_back(MI->getOperand(4));
+ // We can always fold a def.
+ Optimizable = true;
+ return false;
+}
+
+MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
+ bool PreferFalse) const {
+ assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
+ "Unknown select instruction");
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MachineInstr *DefMI = 0;
+ unsigned Opc = canFoldIntoMOVCC(MI->getOperand(2).getReg(), DefMI, MRI);
+ bool Invert = !Opc;
+ if (!Opc)
+ Opc = canFoldIntoMOVCC(MI->getOperand(1).getReg(), DefMI, MRI);
+ if (!Opc)
+ return 0;
+
+ // Create a new predicated version of DefMI.
+ // Rfalse is the first use.
+ MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ get(Opc), MI->getOperand(0).getReg())
+ .addOperand(MI->getOperand(Invert ? 2 : 1));
+
+ // Copy all the DefMI operands, excluding its (null) predicate.
+ const MCInstrDesc &DefDesc = DefMI->getDesc();
+ for (unsigned i = 1, e = DefDesc.getNumOperands();
+ i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
+ NewMI.addOperand(DefMI->getOperand(i));
+
+ unsigned CondCode = MI->getOperand(3).getImm();
+ if (Invert)
+ NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
+ else
+ NewMI.addImm(CondCode);
+ NewMI.addOperand(MI->getOperand(4));
+
+ // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
+ if (NewMI->hasOptionalDef())
+ AddDefaultCC(NewMI);
+
+ // The caller will erase MI, but not DefMI.
+ DefMI->eraseFromParent();
+ return NewMI;
+}
+
/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
/// instruction is encoded with an 'S' bit is determined by the optional CPSR
/// def operand.
@@ -1975,10 +2152,10 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
case ARM::t2EORri: {
// Scan forward for the use of CPSR
// When checking against MI: if it's a conditional code requires
- // checking of V bit, then this is not safe to do. If we can't find the
- // CPSR use (i.e. used in another block), then it's not safe to perform
- // the optimization.
- // When checking against Sub, we handle the condition codes GE, LT, GT, LE.
+ // checking of V bit, then this is not safe to do.
+ // It is safe to remove CmpInstr if CPSR is redefined or killed.
+ // If we are done with the basic block, we need to check whether CPSR is
+ // live-out.
SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
OperandsToUpdate;
bool isSafe = false;
@@ -2018,7 +2195,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
else
switch (CC) {
default:
- isSafe = true;
+ // CPSR can be used multiple times, we should continue.
break;
case ARMCC::VS:
case ARMCC::VC:
@@ -2031,10 +2208,15 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
}
}
- // If the candidate is Sub, we may exit the loop at end of the basic block.
- // In that case, it is still safe to remove CmpInstr.
- if (!isSafe && !Sub)
- return false;
+ // If CPSR is not killed nor re-defined, we should check whether it is
+ // live-out. If it is live-out, do not optimize.
+ if (!isSafe) {
+ MachineBasicBlock *MBB = CmpInstr->getParent();
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(ARM::CPSR))
+ return false;
+ }
// Toggle the optional operand to CPSR.
MI->getOperand(5).setReg(ARM::CPSR);
@@ -2195,7 +2377,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
//
// On Cortex-A8, each pair of register loads / stores can be scheduled on the
// same cycle. The scheduling for the first load / store must be done
- // separately by assuming the the address is not 64-bit aligned.
+ // separately by assuming the address is not 64-bit aligned.
//
// On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
// is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
@@ -3177,11 +3359,18 @@ enum ARMExeDomain {
//
std::pair<uint16_t, uint16_t>
ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
- // VMOVD is a VFP instruction, but can be changed to NEON if it isn't
- // predicated.
+ // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
+ // if they are not predicated.
if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
+ // Cortex-A9 is particularly picky about mixing the two and wants these
+ // converted.
+ if (Subtarget.isCortexA9() && !isPredicated(MI) &&
+ (MI->getOpcode() == ARM::VMOVRS ||
+ MI->getOpcode() == ARM::VMOVSR))
+ return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
+
// No other instructions can be swizzled, so just determine their domain.
unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
@@ -3201,22 +3390,95 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
void
ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
- // We only know how to change VMOVD into VORR.
- assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD");
- if (Domain != ExeNEON)
- return;
+ unsigned DstReg, SrcReg, DReg;
+ unsigned Lane;
+ MachineInstrBuilder MIB(MI);
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ bool isKill;
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("cannot handle opcode!");
+ break;
+ case ARM::VMOVD:
+ if (Domain != ExeNEON)
+ break;
- // Zap the predicate operands.
- assert(!isPredicated(MI) && "Cannot predicate a VORRd");
- MI->RemoveOperand(3);
- MI->RemoveOperand(2);
+ // Zap the predicate operands.
+ assert(!isPredicated(MI) && "Cannot predicate a VORRd");
+ MI->RemoveOperand(3);
+ MI->RemoveOperand(2);
- // Change to a VORRd which requires two identical use operands.
- MI->setDesc(get(ARM::VORRd));
+ // Change to a VORRd which requires two identical use operands.
+ MI->setDesc(get(ARM::VORRd));
+
+ // Add the extra source operand and new predicates.
+ // This will go before any implicit ops.
+ AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
+ break;
+ case ARM::VMOVRS:
+ if (Domain != ExeNEON)
+ break;
+ assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
+
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+
+ DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass);
+ Lane = 0;
+ if (DReg == ARM::NoRegister) {
+ DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass);
+ Lane = 1;
+ assert(DReg && "S-register with no D super-register?");
+ }
+
+ MI->RemoveOperand(3);
+ MI->RemoveOperand(2);
+ MI->RemoveOperand(1);
+
+ MI->setDesc(get(ARM::VGETLNi32));
+ MIB.addReg(DReg);
+ MIB.addImm(Lane);
+
+ MIB->getOperand(1).setIsUndef();
+ MIB.addReg(SrcReg, RegState::Implicit);
+
+ AddDefaultPred(MIB);
+ break;
+ case ARM::VMOVSR:
+ if (Domain != ExeNEON)
+ break;
+ assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
+
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass);
+ Lane = 0;
+ if (DReg == ARM::NoRegister) {
+ DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass);
+ Lane = 1;
+ assert(DReg && "S-register with no D super-register?");
+ }
+ isKill = MI->getOperand(0).isKill();
+
+ MI->RemoveOperand(3);
+ MI->RemoveOperand(2);
+ MI->RemoveOperand(1);
+ MI->RemoveOperand(0);
+
+ MI->setDesc(get(ARM::VSETLNi32));
+ MIB.addReg(DReg, RegState::Define);
+ MIB.addReg(DReg, RegState::Undef);
+ MIB.addReg(SrcReg);
+ MIB.addImm(Lane);
+
+ if (isKill)
+ MIB->addRegisterKilled(DstReg, TRI, true);
+ MIB->addRegisterDefined(DstReg, TRI);
+
+ AddDefaultPred(MIB);
+ break;
+ }
- // Add the extra source operand and new predicates.
- // This will go before any implicit ops.
- AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
}
bool ARMBaseInstrInfo::hasNOP() const {
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 1a10a4ab1c..92e5ee8dcb 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -202,6 +202,13 @@ public:
unsigned SrcReg2, int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const;
+ virtual bool analyzeSelect(const MachineInstr *MI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ unsigned &TrueOp, unsigned &FalseOp,
+ bool &Optimizable) const;
+
+ virtual MachineInstr *optimizeSelect(MachineInstr *MI, bool) const;
+
/// FoldImmediate - 'Reg' is known to be defined by a move immediate
/// instruction, try to fold the immediate into the use instruction.
virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
@@ -352,6 +359,11 @@ ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
int getMatchingCondBranchOpcode(int Opc);
+/// Determine if MI can be folded into an ARM MOVCC instruction, and return the
+/// opcode of the SSA instruction representing the conditional MI.
+unsigned canFoldARMInstrIntoMOVCC(unsigned Reg,
+ MachineInstr *&MI,
+ const MachineRegisterInfo &MRI);
/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether
/// the instruction is encoded with an 'S' bit is determined by the optional
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 533ed8834b..b2ba9703b6 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -65,8 +65,20 @@ extern cl::opt<bool> ReserveR9; // @LOCALMOD
const uint16_t*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (ReserveR9) return CSR_NaCl_SaveList; // @LOCALMOD
+ bool ghcCall = false;
+
+ if (MF) {
+ const Function *F = MF->getFunction();
+ ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
+ }
+
+ if (ghcCall) {
+ return CSR_GHC_SaveList;
+ }
+ else {
return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
+ }
}
const uint32_t*
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 1a79d95f95..e0f3a871ba 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -79,6 +79,25 @@ def RetFastCC_ARM_APCS : CallingConv<[
CCDelegateTo<RetCC_ARM_APCS>
]>;
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention for GHC
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_APCS_GHC : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
+ CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>,
+ CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, SpLim
+ CCIfType<[i32], CCAssignToReg<[R4, R5, R6, R7, R8, R9, R10, R11]>>
+]>;
//===----------------------------------------------------------------------===//
// ARM AAPCS (EABI) Calling Convention, common parts
@@ -117,6 +136,9 @@ def RetCC_ARM_AAPCS_Common : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_ARM_AAPCS : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
@@ -142,6 +164,9 @@ def RetCC_ARM_AAPCS : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_ARM_AAPCS_VFP : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
// Handle all vector types as either f64 or v2f64.
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
@@ -176,6 +201,12 @@ def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
// Also save R7-R4 first to match the stack frame fixed spill areas.
def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
+// GHC set of callee saved regs is empty as all those regs are
+// used for passing STG regs around
+// add is a workaround for not being able to compile empty list:
+// def CSR_GHC : CalleeSavedRegs<()>;
+def CSR_GHC : CalleeSavedRegs<(add)>;
+
// @LOCALMOD-START
// NaCl does not save R9, but otherwise uses the same order as AAPCS
def CSR_NaCl : CalleeSavedRegs<(add LR, R11, R10, R8, R7, R6, R5, R4,
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index fc445a9339..e81b4cc282 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -254,7 +254,7 @@ namespace {
emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
return 0;
}
- unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
int32_t Imm12 = MO1.getImm();
uint32_t Binary;
Binary = Imm12 & 0xfff;
@@ -296,7 +296,7 @@ namespace {
emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
return 0;
}
- unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
int32_t Imm12 = MO1.getImm();
// Special value for #-0
@@ -352,6 +352,12 @@ namespace {
void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
intptr_