aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARM.td2
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp6
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp225
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h24
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp11
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp25
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp67
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp3
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td71
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td3
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td69
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td8
-rw-r--r--lib/Target/ARM/ARMSchedule.td24
-rw-r--r--lib/Target/ARM/ARMScheduleA8.td34
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td35
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp26
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp43
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h4
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp40
-rw-r--r--lib/Target/ARM/CMakeLists.txt2
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp51
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp8
22 files changed, 432 insertions, 349 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 10d7f56c7f..7af8b9d909 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -141,7 +141,7 @@ def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
FeatureAvoidPartialCPSR]>;
class ProcNoItin<string Name, list<SubtargetFeature> Features>
- : Processor<Name, GenericItineraries, Features>;
+ : Processor<Name, NoItineraries, Features>;
// V4 Processors.
def : ProcNoItin<"generic", []>;
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 967c0a8462..76cd0c389d 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -23,8 +23,8 @@
#include "InstPrinter/ARMInstPrinter.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMMCExpr.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Module.h"
#include "llvm/Type.h"
#include "llvm/Assembly/Writer.h"
@@ -515,7 +515,9 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
if (ExtraCode[1] != 0) return true; // Unknown modifier.
switch (ExtraCode[0]) {
- default: return true; // Unknown modifier.
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
case 'a': // Print as a memory address.
if (MI->getOperand(OpNum).isReg()) {
O << "["
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 0811d226b4..08e55429ce 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1738,26 +1738,33 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
return Offset == 0;
}
+/// analyzeCompare - For a comparison instruction, return the source registers
+/// in SrcReg and SrcReg2 if having two register operands, and the value it
+/// compares against in CmpValue. Return true if the comparison instruction
+/// can be analyzed.
bool ARMBaseInstrInfo::
-AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask,
- int &CmpValue) const {
+analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
+ int &CmpMask, int &CmpValue) const {
switch (MI->getOpcode()) {
default: break;
case ARM::CMPri:
case ARM::t2CMPri:
SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
CmpMask = ~0;
CmpValue = MI->getOperand(1).getImm();
return true;
case ARM::CMPrr:
case ARM::t2CMPrr:
SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = MI->getOperand(1).getReg();
CmpMask = ~0;
CmpValue = 0;
return true;
case ARM::TSTri:
case ARM::t2TSTri:
SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
CmpMask = MI->getOperand(1).getImm();
CmpValue = 0;
return true;
@@ -1795,21 +1802,67 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
return false;
}
-/// OptimizeCompareInstr - Convert the instruction supplying the argument to the
-/// comparison into one that sets the zero bit in the flags register. Convert
-/// the SUBrr(r1,r2)|Subri(r1,CmpValue) instruction into one that sets the flags
-/// register and remove the CMPrr(r1,r2)|CMPrr(r2,r1)|CMPri(r1,CmpValue)
-/// instruction.
-bool ARMBaseInstrInfo::
-OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
- int CmpValue, const MachineRegisterInfo *MRI) const {
+/// getSwappedCondition - assume the flags are set by MI(a,b), return
+/// the condition code if we modify the instructions such that flags are
+/// set by MI(b,a).
+inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
+ switch (CC) {
+ default: return ARMCC::AL;
+ case ARMCC::EQ: return ARMCC::EQ;
+ case ARMCC::NE: return ARMCC::NE;
+ case ARMCC::HS: return ARMCC::LS;
+ case ARMCC::LO: return ARMCC::HI;
+ case ARMCC::HI: return ARMCC::LO;
+ case ARMCC::LS: return ARMCC::HS;
+ case ARMCC::GE: return ARMCC::LE;
+ case ARMCC::LT: return ARMCC::GT;
+ case ARMCC::GT: return ARMCC::LT;
+ case ARMCC::LE: return ARMCC::GE;
+ }
+}
+
+/// isRedundantFlagInstr - check whether the first instruction, whose only
+/// purpose is to update flags, can be made redundant.
+/// CMPrr can be made redundant by SUBrr if the operands are the same.
+/// CMPri can be made redundant by SUBri if the operands are the same.
+/// This function can be extended later on.
+inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
+ unsigned SrcReg2, int ImmValue,
+ MachineInstr *OI) {
+ if ((CmpI->getOpcode() == ARM::CMPrr ||
+ CmpI->getOpcode() == ARM::t2CMPrr) &&
+ (OI->getOpcode() == ARM::SUBrr ||
+ OI->getOpcode() == ARM::t2SUBrr) &&
+ ((OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getReg() == SrcReg2) ||
+ (OI->getOperand(1).getReg() == SrcReg2 &&
+ OI->getOperand(2).getReg() == SrcReg)))
+ return true;
- MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
- if (llvm::next(DI) != MRI->def_end())
- // Only support one definition.
- return false;
+ if ((CmpI->getOpcode() == ARM::CMPri ||
+ CmpI->getOpcode() == ARM::t2CMPri) &&
+ (OI->getOpcode() == ARM::SUBri ||
+ OI->getOpcode() == ARM::t2SUBri) &&
+ OI->getOperand(1).getReg() == SrcReg &&
+ OI->getOperand(2).getImm() == ImmValue)
+ return true;
+ return false;
+}
- MachineInstr *MI = &*DI;
+/// optimizeCompareInstr - Convert the instruction supplying the argument to the
+/// comparison into one that sets the zero bit in the flags register;
+/// Remove a redundant Compare instruction if an earlier instruction can set the
+/// flags in the same way as Compare.
+/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
+/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
+/// condition code of instructions which use the flags.
+bool ARMBaseInstrInfo::
+optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
+ int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const {
+ // Get the unique definition of SrcReg.
+ MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+ if (!MI) return false;
// Masked compares sometimes use the same register as the corresponding 'and'.
if (CmpMask != ~0) {
@@ -1840,13 +1893,10 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
// For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
// For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
MachineInstr *Sub = NULL;
- unsigned SrcReg2 = 0;
- if (CmpInstr->getOpcode() == ARM::CMPrr ||
- CmpInstr->getOpcode() == ARM::t2CMPrr) {
- SrcReg2 = CmpInstr->getOperand(1).getReg();
+ if (SrcReg2 != 0)
// MI is not a candidate for CMPrr.
MI = NULL;
- } else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
+ else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
// Conservatively refuse to convert an instruction which isn't in the same
// BB as the comparison.
// For CMPri, we need to check Sub, thus we can't return here.
@@ -1859,40 +1909,19 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
// Check that CPSR isn't set between the comparison instruction and the one we
// want to change. At the same time, search for Sub.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
--I;
for (; I != E; --I) {
const MachineInstr &Instr = *I;
- for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
- const MachineOperand &MO = Instr.getOperand(IO);
- if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR))
- return false;
- if (!MO.isReg()) continue;
-
+ if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
+ Instr.readsRegister(ARM::CPSR, TRI))
// This instruction modifies or uses CPSR after the one we want to
// change. We can't do this transformation.
- if (MO.getReg() == ARM::CPSR)
- return false;
- }
-
- // Check whether the current instruction is SUB(r1, r2) or SUB(r2, r1).
- if (SrcReg2 != 0 &&
- (Instr.getOpcode() == ARM::SUBrr ||
- Instr.getOpcode() == ARM::t2SUBrr) &&
- ((Instr.getOperand(1).getReg() == SrcReg &&
- Instr.getOperand(2).getReg() == SrcReg2) ||
- (Instr.getOperand(1).getReg() == SrcReg2 &&
- Instr.getOperand(2).getReg() == SrcReg))) {
- Sub = &*I;
- break;
- }
+ return false;
- // Check whether the current instruction is SUBri(r1, CmpValue).
- if ((CmpInstr->getOpcode() == ARM::CMPri ||
- CmpInstr->getOpcode() == ARM::t2CMPri) &&
- Instr.getOpcode() == ARM::SUBri && CmpValue != 0 &&
- Instr.getOperand(1).getReg() == SrcReg &&
- Instr.getOperand(2).getImm() == CmpValue) {
+ // Check whether CmpInstr can be made redundant by the current instruction.
+ if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
Sub = &*I;
break;
}
@@ -1950,7 +1979,8 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
// CPSR use (i.e. used in another block), then it's not safe to perform
// the optimization.
// When checking against Sub, we handle the condition codes GE, LT, GT, LE.
- SmallVector<MachineOperand*, 4> OperandsToUpdate;
+ SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
+ OperandsToUpdate;
bool isSafe = false;
I = CmpInstr;
E = CmpInstr->getParent()->end();
@@ -1971,30 +2001,20 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
}
// Condition code is after the operand before CPSR.
ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
- if (Sub)
- switch (CC) {
- default:
+ if (Sub) {
+ ARMCC::CondCodes NewCC = getSwappedCondition(CC);
+ if (NewCC == ARMCC::AL)
return false;
- case ARMCC::GE:
- case ARMCC::LT:
- case ARMCC::GT:
- case ARMCC::LE:
- case ARMCC::HS:
- case ARMCC::LS:
- case ARMCC::HI:
- case ARMCC::LO:
- case ARMCC::EQ:
- case ARMCC::NE:
- // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
- // on CMP needs to be updated to be based on SUB.
- // Push the condition code operands to OperandsToUpdate.
- // If it is safe to remove CmpInstr, the condition code of these
- // operands will be modified.
- if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
- Sub->getOperand(2).getReg() == SrcReg)
- OperandsToUpdate.push_back(&((*I).getOperand(IO-1)));
- break;
- }
+ // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
+ // on CMP needs to be updated to be based on SUB.
+ // Push the condition code operands to OperandsToUpdate.
+ // If it is safe to remove CmpInstr, the condition code of these
+ // operands will be modified.
+ if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg)
+ OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)),
+ NewCC));
+ }
else
switch (CC) {
default:
@@ -2024,26 +2044,9 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
// Modify the condition code of operands in OperandsToUpdate.
// Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
// be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
- for (unsigned i = 0; i < OperandsToUpdate.size(); i++) {
- ARMCC::CondCodes CC = (ARMCC::CondCodes)OperandsToUpdate[i]->getImm();
- ARMCC::CondCodes NewCC;
- switch (CC) {
- default: llvm_unreachable("only expecting less/greater comparisons here");
- case ARMCC::GE: NewCC = ARMCC::LE; break;
- case ARMCC::LT: NewCC = ARMCC::GT; break;
- case ARMCC::GT: NewCC = ARMCC::LT; break;
- case ARMCC::LE: NewCC = ARMCC::GE; break;
- case ARMCC::HS: NewCC = ARMCC::LS; break;
- case ARMCC::LS: NewCC = ARMCC::HS; break;
- case ARMCC::HI: NewCC = ARMCC::LO; break;
- case ARMCC::LO: NewCC = ARMCC::HI; break;
- case ARMCC::EQ:
- case ARMCC::NE:
- NewCC = CC;
- break;
- }
- OperandsToUpdate[i]->setImm(NewCC);
- }
+ for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
+ OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
+
return true;
}
}
@@ -2175,9 +2178,9 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
const MCInstrDesc &Desc = MI->getDesc();
unsigned Class = Desc.getSchedClass();
- unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
- if (UOps)
- return UOps;
+ int ItinUOps = ItinData->getNumMicroOps(Class);
+ if (ItinUOps >= 0)
+ return ItinUOps;
unsigned Opc = MI->getOpcode();
switch (Opc) {
@@ -2251,19 +2254,19 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
return 2;
// 4 registers would be issued: 2, 2.
// 5 registers would be issued: 2, 2, 1.
- UOps = (NumRegs / 2);
+ int A8UOps = (NumRegs / 2);
if (NumRegs % 2)
- ++UOps;
- return UOps;
+ ++A8UOps;
+ return A8UOps;
} else if (Subtarget.isCortexA9()) {
- UOps = (NumRegs / 2);
+ int A9UOps = (NumRegs / 2);
// If there are odd number of registers or if it's not 64-bit aligned,
// then it takes an extra AGU (Address Generation Unit) cycle.
if ((NumRegs % 2) ||
!MI->hasOneMemOperand() ||
(*MI->memoperands_begin())->getAlignment() < 8)
- ++UOps;
- return UOps;
+ ++A9UOps;
+ return A9UOps;
} else {
// Assume the worst.
return NumRegs;
@@ -2763,11 +2766,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
unsigned NewUseIdx;
const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
Reg, NewUseIdx, UseAdj);
- if (NewUseMI) {
- UseMI = NewUseMI;
- UseIdx = NewUseIdx;
- UseMCID = &UseMI->getDesc();
- }
+ if (!NewUseMI)
+ return -1;
+
+ UseMI = NewUseMI;
+ UseIdx = NewUseIdx;
+ UseMCID = &UseMI->getDesc();
}
if (Reg == ARM::CPSR) {
@@ -2795,6 +2799,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
+ if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit())
+ return -1;
+
unsigned DefAlign = DefMI->hasOneMemOperand()
? (*DefMI->memoperands_begin())->getAlignment() : 0;
unsigned UseAlign = UseMI->hasOneMemOperand()
@@ -3015,9 +3022,7 @@ ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
return 1;
// If the second MI is predicated, then there is an implicit use dependency.
- int Latency = getOperandLatency(ItinData, DefMI, DefIdx, DepMI,
- DepMI->getNumOperands());
- return (Latency <= 0) ? 1 : Latency;
+ return getInstrLatency(ItinData, DefMI);
}
unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
@@ -3054,9 +3059,9 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
unsigned Class = MCID.getSchedClass();
// For instructions with variable uops, use uops as latency.
- if (!ItinData->isEmpty() && !ItinData->Itineraries[Class].NumMicroOps) {
+ if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
return getNumMicroOps(ItinData, MI);
- }
+
// For the common case, fall back on the itinerary's latency.
unsigned Latency = ItinData->getStageLatency(Class);
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 8217f239d1..1a10a4ab1c 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -186,16 +186,20 @@ public:
return NumCycles == 1;
}
- /// AnalyzeCompare - For a comparison instruction, return the source register
- /// in SrcReg and the value it compares against in CmpValue. Return true if
- /// the comparison instruction can be analyzed.
- virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
- int &CmpMask, int &CmpValue) const;
-
- /// OptimizeCompareInstr - Convert the instruction to set the zero flag so
- /// that we can remove a "comparison with zero".
- virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
- int CmpMask, int CmpValue,
+ /// analyzeCompare - For a comparison instruction, return the source registers
+ /// in SrcReg and SrcReg2 if having two register operands, and the value it
+ /// compares against in CmpValue. Return true if the comparison instruction
+ /// can be analyzed.
+ virtual bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ unsigned &SrcReg2, int &CmpMask,
+ int &CmpValue) const;
+
+ /// optimizeCompareInstr - Convert the instruction to set the zero flag so
+ /// that we can remove a "comparison with zero"; Remove a redundant CMP
+ /// instruction if the flags can be updated in the same way by an earlier
+ /// instruction such as SUB.
+ virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+ unsigned SrcReg2, int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const;
/// FoldImmediate - 'Reg' is known to be defined by a move immediate
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index c386a01e89..3650e1fb77 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -471,22 +471,23 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
MIB.addOperand(MI.getOperand(OpIdx++));
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ bool SrcIsUndef = MI.getOperand(OpIdx).isUndef();
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
- MIB.addReg(D0);
+ MIB.addReg(D0, getUndefRegState(SrcIsUndef));
if (NumRegs > 1 && TableEntry->copyAllListRegs)
- MIB.addReg(D1);
+ MIB.addReg(D1, getUndefRegState(SrcIsUndef));
if (NumRegs > 2 && TableEntry->copyAllListRegs)
- MIB.addReg(D2);
+ MIB.addReg(D2, getUndefRegState(SrcIsUndef));
if (NumRegs > 3 && TableEntry->copyAllListRegs)
- MIB.addReg(D3);
+ MIB.addReg(D3, getUndefRegState(SrcIsUndef));
// Copy the predicate operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
- if (SrcIsKill) // Add an implicit kill for the super-reg.
+ if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg.
MIB->addRegisterKilled(SrcReg, TRI, true);
TransferImpOps(MI, MIB, MIB);
// Transfer memoperands.
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 2158b7e028..ff660210ea 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -177,7 +177,6 @@ class ARMFastISel : public FastISel {
bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
unsigned Alignment = 0, bool isZExt = true,
bool allocReg = true);
-
bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
unsigned Alignment = 0);
bool ARMComputeAddress(const Value *Obj, Address &Addr);
@@ -1361,7 +1360,7 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
.addReg(AddrReg));
- return true;
+ return true;
}
bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
@@ -1740,7 +1739,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
// type and the target independent selector doesn't know how to handle it.
if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
return false;
-
+
unsigned Opc;
switch (ISDOpcode) {
default: return false;
@@ -2146,7 +2145,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
return false;
// Can't handle non-double multi-reg retvals.
- if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
+ if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
@@ -2352,7 +2351,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
MIB.addReg(CalleeReg);
else if (!IntrMemName)
MIB.addGlobalAddress(GV, 0, 0);
- else
+ else
MIB.addExternalSymbol(IntrMemName, 0);
} else {
if (UseReg)
@@ -2365,7 +2364,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// Explicitly adding the predicate here.
AddDefaultPred(MIB);
}
-
+
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
@@ -2486,10 +2485,10 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
return true;
}
}
-
+
if (!MTI.getLength()->getType()->isIntegerTy(32))
return false;
-
+
if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
return false;
@@ -2501,13 +2500,13 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
// Don't handle volatile.
if (MSI.isVolatile())
return false;
-
+
if (!MSI.getLength()->getType()->isIntegerTy(32))
return false;
-
+
if (MSI.getDestAddressSpace() > 255)
return false;
-
+
return SelectCall(&I, "memset");
}
case Intrinsic::trap: {
@@ -2518,7 +2517,7 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
}
bool ARMFastISel::SelectTrunc(const Instruction *I) {
- // The high bits for a type smaller than the register size are assumed to be
+ // The high bits for a type smaller than the register size are assumed to be
// undefined.
Value *Op = I->getOperand(0);
@@ -2709,7 +2708,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
// See if we can handle this address.
Address Addr;
if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
-
+
unsigned ResultReg = MI->getOperand(0).getReg();
if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
return false;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index e1c89e0c42..238b79e1f1 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -6561,11 +6561,12 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
bool isThumb2 = Subtarget->isThumb2();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned ldrOpc, strOpc, UnitSize;
+ unsigned ldrOpc, strOpc, UnitSize = 0;
const TargetRegisterClass *TRC = isThumb2 ?
(const TargetRegisterClass*)&ARM::tGPRRegClass :
(const TargetRegisterClass*)&ARM::GPRRegClass;
+ const TargetRegisterClass *TRC_Vec = 0;
if (Align & 1) {
ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
@@ -6576,10 +6577,30 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST;
UnitSize = 2;
} else {
- ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
- UnitSize = 4;
+ // Check whether we can use NEON instructions.
+ if (!MF->getFunction()->hasFnAttr(Attribute::NoImplicitFloat) &&
+ Subtarget->hasNEON()) {
+ if ((Align % 16 == 0) && SizeVal >= 16) {
+ ldrOpc = ARM::VLD1q32wb_fixed;
+ strOpc = ARM::VST1q32wb_fixed;
+ UnitSize = 16;
+ TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass;
+ }
+ else if ((Align % 8 == 0) && SizeVal >= 8) {
+ ldrOpc = ARM::VLD1d32wb_fixed;
+ strOpc = ARM::VST1d32wb_fixed;
+ UnitSize = 8;
+ TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass;
+ }
+ }
+ // Can't use NEON instructions.
+ if (UnitSize == 0) {
+ ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
+ strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
+ UnitSize = 4;
+ }
}
+
unsigned BytesLeft = SizeVal % UnitSize;
unsigned LoopSize = SizeVal - BytesLeft;
@@ -6590,10 +6611,17 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
unsigned srcIn = src;
unsigned destIn = dest;
for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
+ unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
+ if (UnitSize >= 8) {
+ AddDefaultPred(BuildMI(*BB, MI, dl,
+ TII->get(ldrOpc), scratch)
+ .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0));
+
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
+ .addReg(destIn).addImm(0).addReg(scratch));
+ } else if (isThumb2) {
AddDefaultPred(BuildMI(*BB, MI, dl,
TII->get(ldrOpc), scratch)
.addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize));
@@ -6739,8 +6767,14 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
// [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
- unsigned scratch = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
+ unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
+ if (UnitSize >= 8) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
+ .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0));
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
+ .addReg(destPhi).addImm(0).addReg(scratch));
+ } else if (isThumb2) {
AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
.addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize));
@@ -7113,9 +7147,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineRegisterInfo &MRI = Fn->getRegInfo();
// In Thumb mode S must not be specified if source register is the SP or
// PC and if destination register is the SP, so restrict register class
- unsigned NewMovDstReg = MRI.createVirtualRegister(isThumb2 ?
- (const TargetRegisterClass*)&ARM::rGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass);
unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ?
(const TargetRegisterClass*)&ARM::rGPRRegClass :
(const TargetRegisterClass*)&ARM::GPRRegClass);
@@ -7132,12 +7163,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// fall through to SinkMBB
RSBBB->addSuccessor(SinkBB);
- // insert a movs at the end of BB
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr),
- NewMovDstReg)
- .addReg(ABSSrcReg, RegState::Kill)
- .addImm((unsigned)ARMCC::AL).addReg(0)
- .addReg(ARM::CPSR, RegState::Define);
+ // insert a cmp at the end of BB
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(ABSSrcReg).addImm(0));
// insert a bcc with opposite CC to ARMCC::MI at the end of BB
BuildMI(BB, dl,
@@ -7149,7 +7178,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// by if-conversion pass
BuildMI(*RSBBB, RSBBB->begin(), dl,
TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
- .addReg(NewMovDstReg, RegState::Kill)
+ .addReg(ABSSrcReg, RegState::Kill)
.addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
// insert PHI in SinkBB,
@@ -7157,7 +7186,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BuildMI(*SinkBB, SinkBB->begin(), dl,
TII->get(ARM::PHI), ABSDstReg)
.addReg(NewRsbDstReg).addMBB(RSBBB)
- .addReg(NewMovDstReg).addMBB(BB);
+ .addReg(ABSSrcReg).addMBB(BB);
// remove ABS instruction
MI->eraseFromParent();
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index b8f607eb4c..31b0c41f08 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -31,7 +31,8 @@ ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
if (hasNOP()) {
- NopInst.setOpcode(ARM::NOP);
+ NopInst.setOpcode(ARM::HINT);
+ NopInst.addOperand(MCOperand::CreateImm(0));
NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
NopInst.addOperand(MCOperand::CreateReg(0));
} else {
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 611d9194fd..6a14871bb0 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -244,7 +244,8 @@ def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
// But only select them if more precision in FP computation is allowed.
// Do not use them for Darwin platforms.
-def UseFusedMAC : Predicate<"!TM.Options.NoExcessFPPrecision && "
+def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
+ " FPOpFusion::Fast) && "
"!Subtarget->isTargetDarwin()">;
def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || "