aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Target/Target.td1
-rw-r--r--include/llvm/Target/TargetInstrDesc.h11
-rw-r--r--include/llvm/Target/TargetInstrInfo.h7
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp4
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp62
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp97
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h5
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td42
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td4
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td29
-rw-r--r--test/CodeGen/ARM/arm-and-tst-peephole.ll5
-rw-r--r--test/CodeGen/ARM/select_xform.ll3
-rw-r--r--test/CodeGen/Thumb2/machine-licm.ll41
-rw-r--r--test/CodeGen/Thumb2/thumb2-mov.ll6
-rw-r--r--test/CodeGen/Thumb2/thumb2-select_xform.ll4
-rw-r--r--utils/TableGen/CodeGenInstruction.cpp1
-rw-r--r--utils/TableGen/CodeGenInstruction.h1
-rw-r--r--utils/TableGen/InstrInfoEmitter.cpp1
18 files changed, 269 insertions, 55 deletions
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index bd629f16e2..f7a029bd61 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -199,6 +199,7 @@ class Instruction {
bit isBranch = 0; // Is this instruction a branch instruction?
bit isIndirectBranch = 0; // Is this instruction an indirect branch?
bit isCompare = 0; // Is this instruction a comparison instruction?
+ bit isMoveImm = 0; // Is this instruction a move immediate instruction?
bit isBarrier = 0; // Can control flow fall through this instruction?
bit isCall = 0; // Is this instruction a call instruction?
bit canFoldAsLoad = 0; // Can this be folded as a simple memory operand?
diff --git a/include/llvm/Target/TargetInstrDesc.h b/include/llvm/Target/TargetInstrDesc.h
index a127aed8f6..11b7a5722b 100644
--- a/include/llvm/Target/TargetInstrDesc.h
+++ b/include/llvm/Target/TargetInstrDesc.h
@@ -103,13 +103,14 @@ namespace TID {
Terminator,
Branch,
IndirectBranch,
- Predicable,
- NotDuplicable,
Compare,
+ MoveImm,
DelaySlot,
FoldableAsLoad,
MayLoad,
MayStore,
+ Predicable,
+ NotDuplicable,
UnmodeledSideEffects,
Commutable,
ConvertibleTo3Addr,
@@ -352,6 +353,12 @@ public:
return Flags & (1 << TID::Compare);
}
+ /// isMoveImmediate - Return true if this instruction is a move immediate
+ /// (including conditional moves) instruction.
+ bool isMoveImmediate() const {
+ return Flags & (1 << TID::MoveImm);
+ }
+
/// isNotDuplicable - Return true if this instruction cannot be safely
/// duplicated. For example, if the instruction has a unique labels attached
/// to it, duplicating it would cause multiple definition errors.
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 2bb01f483a..09400b3b83 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -608,6 +608,13 @@ public:
const MachineRegisterInfo *MRI) const {
return false;
}
+
+ /// FoldImmediate - 'Reg' is known to be defined by a move immediate
+ /// instruction, try to fold the immediate into the use instruction.
+ virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const {
+ return false;
+ }
/// getNumMicroOps - Return the number of u-operations the given machine
/// instruction will be decoded to on the target cpu.
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index d19b319884..e6e912199f 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -365,13 +365,15 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
PM.add(createDeadMachineInstructionElimPass());
printAndVerify(PM, "After codegen DCE pass");
- PM.add(createPeepholeOptimizerPass());
if (!DisableMachineLICM)
PM.add(createMachineLICMPass());
PM.add(createMachineCSEPass());
if (!DisableMachineSink)
PM.add(createMachineSinkingPass());
printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
+
+ PM.add(createPeepholeOptimizerPass());
+ printAndVerify(PM, "After codegen peephole optimization pass");
}
// Pre-ra tail duplication.
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 75f453ad71..07c2b80dcb 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -41,7 +41,9 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -56,6 +58,7 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumEliminated, "Number of compares eliminated");
+STATISTIC(NumImmFold, "Number of move immediate foled");
namespace {
class PeepholeOptimizer : public MachineFunctionPass {
@@ -85,6 +88,12 @@ namespace {
bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+ bool isMoveImmediate(MachineInstr *MI,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
};
}
@@ -257,6 +266,49 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
return false;
}
+bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.isMoveImmediate())
+ return false;
+ if (TID.getNumDefs() != 1)
+ return false;
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ ImmDefMIs.insert(std::make_pair(Reg, MI));
+ ImmDefRegs.insert(Reg);
+ return true;
+ }
+
+ return false;
+}
+
+/// FoldImmediate - Try folding register operands that are defined by move
+/// immediate instructions, i.e. a trivial constant folding optimization, if
+/// and only if the def and use are in the same BB.
+bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (ImmDefRegs.count(Reg) == 0)
+ continue;
+ DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
+ assert(II != ImmDefMIs.end());
+ if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
+ ++NumImmFold;
+ return true;
+ }
+ }
+ return false;
+}
+
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (DisablePeephole)
return false;
@@ -269,9 +321,15 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
SmallPtrSet<MachineInstr*, 8> LocalMIs;
+ SmallSet<unsigned, 4> ImmDefRegs;
+ DenseMap<unsigned, MachineInstr*> ImmDefMIs;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
MachineBasicBlock *MBB = &*I;
+
+ bool SeenMoveImm = false;
LocalMIs.clear();
+ ImmDefRegs.clear();
+ ImmDefMIs.clear();
for (MachineBasicBlock::iterator
MII = I->begin(), MIE = I->end(); MII != MIE; ) {
@@ -283,8 +341,12 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (MI->getDesc().isCompare()) {
Changed |= OptimizeCmpInstr(MI, MBB);
+ } else if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
+ SeenMoveImm = true;
} else {
Changed |= OptimizeExtInstr(MI, MBB, LocalMIs);
+ if (SeenMoveImm)
+ Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
}
}
}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index c743628df4..f61dfb7629 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1556,6 +1556,103 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
return false;
}
+bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
+ MachineInstr *DefMI, unsigned Reg,
+ MachineRegisterInfo *MRI) const {
+ // Fold large immediates into add, sub, or, xor.
+ unsigned DefOpc = DefMI->getOpcode();
+ if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
+ return false;
+ if (!DefMI->getOperand(1).isImm())
+ // Could be t2MOVi32imm <ga:xx>
+ return false;
+
+ if (!MRI->hasOneNonDBGUse(Reg))
+ return false;
+
+ unsigned UseOpc = UseMI->getOpcode();
+ unsigned NewUseOpc;
+ uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
+ uint32_t SOImmValV1, SOImmValV2;
+ bool Commute = false;
+ switch (UseOpc) {
+ default: return false;
+ case ARM::SUBrr:
+ case ARM::ADDrr:
+ case ARM::ORRrr:
+ case ARM::EORrr:
+ case ARM::t2SUBrr:
+ case ARM::t2ADDrr:
+ case ARM::t2ORRrr:
+ case ARM::t2EORrr: {
+ Commute = UseMI->getOperand(2).getReg() != Reg;
+ switch (UseOpc) {
+ default: break;
+ case ARM::SUBrr: {
+ if (Commute)
+ return false;
+ ImmVal = -ImmVal;
+ NewUseOpc = ARM::SUBri;
+ // Fallthrough
+ }
+ case ARM::ADDrr:
+ case ARM::ORRrr:
+ case ARM::EORrr: {
+ if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
+ return false;
+ SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
+ SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ switch (UseOpc) {
+ default: break;
+ case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
+ case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
+ case ARM::EORrr: NewUseOpc = ARM::EORri; break;
+ }
+ break;
+ }
+ case ARM::t2SUBrr: {
+ if (Commute)
+ return false;
+ ImmVal = -ImmVal;
+ NewUseOpc = ARM::t2SUBri;
+ // Fallthrough
+ }
+ case ARM::t2ADDrr:
+ case ARM::t2ORRrr:
+ case ARM::t2EORrr: {
+ if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
+ return false;
+ SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
+ SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
+ switch (UseOpc) {
+ default: break;
+ case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
+ case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
+ case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ unsigned OpIdx = Commute ? 2 : 1;
+ unsigned Reg1 = UseMI->getOperand(OpIdx).getReg();
+ bool isKill = UseMI->getOperand(OpIdx).isKill();
+ unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
+ AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
+ *UseMI, UseMI->getDebugLoc(),
+ get(NewUseOpc), NewReg)
+ .addReg(Reg1, getKillRegState(isKill))
+ .addImm(SOImmValV1)));
+ UseMI->setDesc(get(NewUseOpc));
+ UseMI->getOperand(1).setReg(NewReg);
+ UseMI->getOperand(1).setIsKill();
+ UseMI->getOperand(2).ChangeToImmediate(SOImmValV2);
+ DefMI->eraseFromParent();
+ return true;
+}
+
unsigned
ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
const MachineInstr *MI) const {
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index c929fce9e7..cbcc428b71 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -346,6 +346,11 @@ public:
int CmpMask, int CmpValue,
const MachineRegisterInfo *MRI) const;
+ /// FoldImmediate - 'Reg' is known to be defined by a move immediate
+ /// instruction, try to fold the immediate into the use instruction.
+ virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const;
+
virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
const MachineInstr *MI) const;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index b2a0a614e9..3696fef118 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -269,6 +269,16 @@ def sube_live_carry :
PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
[{return N->hasAnyUseOfValue(1);}]>;
+// An 'and' node with a single use.
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+
+// An 'xor' node with a single use.
+def xor_su : PatFrag<(ops node:$lhs, node:$rhs), (xor node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+
//===----------------------------------------------------------------------===//
// Operand Definitions.
//
@@ -1941,7 +1951,7 @@ def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src),
let Inst{25} = 0;
}
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi,
"mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP {
bits<4> Rd;
@@ -1952,7 +1962,7 @@ def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi,
let Inst{11-0} = imm;
}
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins i32imm:$imm),
DPFrm, IIC_iMOVi,
"movw", "\t$Rd, $imm",
@@ -2510,7 +2520,7 @@ def MVNs : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg:$shift), DPSoRegFrm,
let Inst{15-12} = Rd;
let Inst{11-0} = shift;
}
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm,
IIC_iMVNi, "mvn", "\t$Rd, $imm",
[(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP {
@@ -2972,10 +2982,10 @@ defm CMP : AI1_cmp_irs<0b1010, "cmp",
// Note that TST/TEQ don't set all the same flags that CMP does!
defm TST : AI1_cmp_irs<0b1000, "tst",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
- BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>, 1>;
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1>;
defm TEQ : AI1_cmp_irs<0b1001, "teq",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
- BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>, 1>;
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>;
defm CMPz : AI1_cmp_irs<0b1010, "cmp",
IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
@@ -3038,6 +3048,7 @@ def MOVCCs : AI1<0b1101, (outs GPR:$Rd),
let Inst{11-0} = shift;
}
+let isMoveImm = 1 in
def MOVCCi16 : AI1<0b1000, (outs GPR:$Rd), (ins GPR:$false, i32imm:$imm),
DPFrm, IIC_iMOVi,
"movw", "\t$Rd, $imm",
@@ -3053,6 +3064,7 @@ def MOVCCi16 : AI1<0b1000, (outs GPR:$Rd), (ins GPR:$false, i32imm:$imm),
let Inst{11-0} = imm{11-0};
}
+let isMoveImm = 1 in
def MOVCCi : AI1<0b1101, (outs GPR:$Rd),
(ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi,
"mov", "\t$Rd, $imm",
@@ -3068,10 +3080,12 @@ def MOVCCi : AI1<0b1101, (outs GPR:$Rd),
}
// Two instruction predicate mov immediate.
+let isMoveImm = 1 in
def MOVCCi32imm : PseudoInst<(outs GPR:$Rd),
(ins GPR:$false, i32imm:$src, pred:$p),
IIC_iCMOVix2, "", []>, RegConstraint<"$false = $Rd">;
+let isMoveImm = 1 in
def MVNCCi : AI1<0b1111, (outs GPR:$Rd),
(ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi,
"mvn", "\t$Rd, $imm",
@@ -3337,27 +3351,11 @@ def Int_eh_sjlj_dispatchsetup :
// Large immediate handling.
-// FIXME: Folding immediates into these logical operations aren't necessary
-// good ideas. If it's in a loop machine licm could have hoisted the immediate
-// computation out of the loop.
-def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS),
- (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
- (so_imm2part_2 imm:$RHS))>;
-def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS),
- (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
- (so_imm2part_2 imm:$RHS))>;
-def : ARMPat<(add GPR:$LHS, so_imm2part:$RHS),
- (ADDri (ADDri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
- (so_imm2part_2 imm:$RHS))>;
-def : ARMPat<(add GPR:$LHS, so_neg_imm2part:$RHS),
- (SUBri (SUBri GPR:$LHS, (so_neg_imm2part_1 imm:$RHS)),
- (so_neg_imm2part_2 imm:$RHS))>;
-
// 32-bit immediate using two piece so_imms or movw + movt.
// This is a single pseudo instruction, the benefit is that it can be remat'd
// as a single unit instead of having to handle reg inputs.
// FIXME: Remove this when we can do generalized remat.
-let isReMaterializable = 1 in
+let isReMaterializable = 1, isMoveImm = 1 in
def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2, "",
[(set GPR:$dst, (arm_i32imm:$src))]>,
Requires<[IsARM]>;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index d7a22a4842..253b541c19 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -711,6 +711,7 @@ def tLSRrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr,
T1DataProcessing<0b0011>;
// move register
+let isMoveImm = 1 in
def tMOVi8 : T1sI<(outs tGPR:$dst), (ins i32imm:$src), IIC_iMOVi,
"mov", "\t$dst, $src",
[(set tGPR:$dst, imm0_255:$src)]>,
@@ -844,7 +845,7 @@ def tSXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
let isCompare = 1, isCommutable = 1, Defs = [CPSR] in
def tTST : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iTSTr,
"tst", "\t$lhs, $rhs",
- [(ARMcmpZ (and tGPR:$lhs, tGPR:$rhs), 0)]>,
+ [(ARMcmpZ (and_su tGPR:$lhs, tGPR:$rhs), 0)]>,
T1DataProcessing<0b1000>;
// zero-extend byte
@@ -877,6 +878,7 @@ def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr,
"mov", "\t$dst, $rhs", []>,
T1Special<{1,0,?,?}>;
+let isMoveImm = 1 in
def tMOVCCi : T1pIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMOVi,
"mov", "\t$dst, $rhs", []>,
T1General<{1,0,0,?,?}>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index fd7fb44149..ace133339e 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1591,7 +1591,8 @@ def t2MOVr : T2sTwoReg<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
}
// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
+ AddedComplexity = 1 in
def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi,
"mov", ".w\t$Rd, $imm",
[(set rGPR:$Rd, t2_so_imm:$imm)]> {
@@ -1603,7 +1604,7 @@ def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi,
let Inst{15} = 0;
}
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm:$imm), IIC_iMOVi,
"movw", "\t$Rd, $imm",
[(set rGPR:$Rd, imm0_65535:$imm)]> {
@@ -2519,10 +2520,10 @@ def : T2Pat<(ARMcmpZ GPR:$src, t2_so_imm_neg:$imm),
defm t2TST : T2I_cmp_irs<0b0000, "tst",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
- BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>>;
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>;
defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
- BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>>;
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
@@ -2541,6 +2542,7 @@ def t2MOVCCr : T2I<(outs rGPR:$dst), (ins rGPR:$false, rGPR:$true), IIC_iCMOVr,
let Inst{7-4} = 0b0000;
}
+let isMoveImm = 1 in
def t2MOVCCi : T2I<(outs rGPR:$dst), (ins rGPR:$false, t2_so_imm:$true),
IIC_iCMOVi, "mov", ".w\t$dst, $true",
[/*(set rGPR:$dst,(ARMcmov rGPR:$false,t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
@@ -2553,6 +2555,7 @@ def t2MOVCCi : T2I<(outs rGPR:$dst), (ins rGPR:$false, t2_so_imm:$true),
let Inst{15} = 0;
}
+let isMoveImm = 1 in
def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, i32imm:$imm),
IIC_iCMOVi,
"movw", "\t$Rd, $imm", []>,
@@ -2573,10 +2576,12 @@ def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, i32imm:$imm),
let Inst{7-0} = imm{7-0};
}
+let isMoveImm = 1 in
def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst),
(ins rGPR:$false, i32imm:$src, pred:$p),
IIC_iCMOVix2, "", []>, RegConstraint<"$false = $dst">;
+let isMoveImm = 1 in
def t2MVNCCi : T2I<(outs rGPR:$dst), (ins rGPR:$false, t2_so_imm:$true),
IIC_iCMOVi, "mvn", ".w\t$dst, $true",
[/*(set rGPR:$dst,(ARMcmov rGPR:$false,t2_so_imm_not:$true,
@@ -3014,24 +3019,10 @@ def t2RFEIA : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base",
// Non-Instruction Patterns
//
-// Two piece so_imms.
-def : T2Pat<(or rGPR:$LHS, t2_so_imm2part:$RHS),
- (t2ORRri (t2ORRri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
- (t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(xor rGPR:$LHS, t2_so_imm2part:$RHS),
- (t2EORri (t2EORri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
- (t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(add rGPR:$LHS, t2_so_imm2part:$RHS),
- (t2ADDri (t2ADDri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
- (t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(add rGPR:$LHS, t2_so_neg_imm2part:$RHS),
- (t2SUBri (t2SUBri rGPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)),
- (t2_so_neg_imm2part_2 imm:$RHS))>;
-
// 32-bit immediate using movw + movt.
// This is a single pseudo instruction to make it re-materializable.
// FIXME: Remove this when we can do generalized remat.
-let isReMaterializable = 1 in
+let isReMaterializable = 1, isMoveImm = 1 in
def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
"", [(set rGPR:$dst, (i32 imm:$src))]>,
Requires<[IsThumb, HasV6T2]>;
diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll
index 461150f06d..50c638b739 100644
--- a/test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -27,9 +27,8 @@ tailrecurse: ; preds = %sw.bb, %entry
; ARM-NEXT: beq
; THUMB: movs r5, #3
-; THUMB-NEXT: mov r6, r4
-; THUMB-NEXT: ands r6, r5
-; THUMB-NEXT: tst r4, r5
+; THUMB-NEXT: ands r5, r4
+; THUMB-NEXT: cmp r5, #0
; THUMB-NEXT: beq
; T2: ands r12, r12, #3
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 21bc5fa827..5dabfc3a82 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -8,7 +8,8 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
; ARM: movgt r0, r1
; T2: t1:
-; T2: sub.w r0, r1, #-2147483648
+; T2: mvn r0, #-2147483648
+; T2: add r0, r1
; T2: movgt r0, r1
%tmp1 = icmp sgt i32 %c, 10
%tmp2 = select i1 %tmp1, i32 0, i32 2147483647
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index 14d04a4d8f..37a15ff7b5 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -77,10 +77,49 @@ bb2:
}
; CHECK-NOT: LCPI1_0:
-; CHECK: .subsections_via_symbols
declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+; rdar://8241368
+; isel should not fold immediate into eor's which would have prevented LICM.
+define zeroext i16 @t3(i8 zeroext %data, i16 zeroext %crc) nounwind readnone {
+; CHECK: t3:
+bb.nph:
+; CHECK: bb.nph
+; CHECK: movw {{(r[0-9])|(lr)}}, #32768
+; CHECK: movs {{(r[0-9])|(lr)}}, #8
+; CHECK: movw [[REGISTER:(r[0-9])|(lr)]], #16386
+; CHECK: movw {{(r[0-9])|(lr)}}, #65534
+; CHECK: movt {{(r[0-9])|(lr)}}, #65535
+ br label %bb
+
+bb: ; preds = %bb, %bb.nph
+; CHECK: bb
+; CHECK: eor.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]]
+; CHECK: eor.w
+; CHECK-NOT: eor
+; CHECK: and
+ %data_addr.013 = phi i8 [ %data, %bb.nph ], [ %8, %bb ] ; <i8> [#uses=2]
+ %crc_addr.112 = phi i16 [ %crc, %bb.nph ], [ %crc_addr.2, %bb ] ; <i16> [#uses=3]
+ %i.011 = phi i8 [ 0, %bb.nph ], [ %7, %bb ] ; <i8> [#uses=1]
+ %0 = trunc i16 %crc_addr.112 to i8 ; <i8> [#uses=1]
+ %1 = xor i8 %data_addr.013, %0 ; <i8> [#uses=1]
+ %2 = and i8 %1, 1 ; <i8> [#uses=1]
+ %3 = icmp eq i8 %2, 0 ; <i1> [#uses=2]
+ %4 = xor i16 %crc_addr.112, 16386 ; <i16> [#uses=1]
+ %crc_addr.0 = select i1 %3, i16 %crc_addr.112, i16 %4 ; <i16> [#uses=1]
+ %5 = lshr i16 %crc_addr.0, 1 ; <i16> [#uses=2]
+ %6 = or i16 %5, -32768 ; <i16> [#uses=1]
+ %crc_addr.2 = select i1 %3, i16 %5, i16 %6 ; <i16> [#uses=2]
+ %7 = add i8 %i.011, 1 ; <i8> [#uses=2]
+ %8 = lshr i8 %data_addr.013, 1 ; <i8> [#uses=1]
+ %exitcond = icmp eq i8 %7, 8 ; <i1> [#uses=1]
+ br i1 %exitcond, label %bb8, label %bb
+
+bb8: ; preds = %bb
+ ret i16 %crc_addr.2
+}
diff --git a/test/CodeGen/Thumb2/thumb2-mov.ll b/test/CodeGen/Thumb2/thumb2-mov.ll
index 1dc3614993..adb6dde2c7 100644
--- a/test/CodeGen/Thumb2/thumb2-mov.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov.ll
@@ -53,7 +53,7 @@ define i32 @t2_const_var2_2_ok_1(i32 %lhs) {
define i32 @t2_const_var2_2_ok_2(i32 %lhs) {
;CHECK: t2_const_var2_2_ok_2:
-;CHECK: add.w r0, r0, #-1426063360
+;CHECK: add.w r0, r0, #2868903936
;CHECK: add.w r0, r0, #47616
%ret = add i32 %lhs, 2868951552 ; 0xab00ba00
ret i32 %ret
@@ -61,7 +61,7 @@ define i32 @t2_const_var2_2_ok_2(i32 %lhs) {
define i32 @t2_const_var2_2_ok_3(i32 %lhs) {
;CHECK: t2_const_var2_2_ok_3:
-;CHECK: add.w r0, r0, #-1426019584
+;CHECK: add.w r0, r0, #2868947712
;CHECK: adds r0, #16
%ret = add i32 %lhs, 2868947728 ; 0xab00ab10
ret i32 %ret
@@ -69,7 +69,7 @@ define i32 @t2_const_var2_2_ok_3(i32 %lhs) {
define i32 @t2_const_var2_2_ok_4(i32 %lhs) {
;CHECK: t2_const_var2_2_ok_4:
-;CHECK: add.w r0, r0, #-1426019584
+;CHECK: add.w r0, r0, #2868947712
;CHECK: add.w r0, r0, #1048592
%ret = add i32 %lhs, 2869996304 ; 0xab10ab10
ret i32 %ret
diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll
index 56cb1f6fb4..ceefabbbfa 100644
--- a/test/CodeGen/Thumb2/thumb2-select_xform.ll
+++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll
@@ -2,8 +2,8 @@
define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
; CHECK: t1
-; CHECK: sub.w r0, r1, #-2147483648
-; CHECK: subs r0, #1
+; CHECK: mvn r0, #-2147483648
+; CHECK: add r0, r1
; CHECK: cmp r2, #10
; CHECK: it gt
; CHECK: movgt r0, r1
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index 575852b1ad..a28b1d58d7 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -286,6 +286,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R) : TheDef(R), Operands(R) {
isBranch = R->getValueAsBit("isBranch");
isIndirectBranch = R->getValueAsBit("isIndirectBranch");
isCompare = R->getValueAsBit("isCompare");
+ isMoveImm = R->getValueAsBit("isMoveImm");
isBarrier = R->getValueAsBit("isBarrier");
isCall = R->getValueAsBit("isCall");
canFoldAsLoad = R->getValueAsBit("canFoldAsLoad");
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index 9cd23e6fa8..d58bfb1296 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -213,6 +213,7 @@ namespace llvm {
bool isBranch;
bool isIndirectBranch;
bool isCompare;
+ bool isMoveImm;
bool isBarrier;
bool isCall;
bool canFoldAsLoad;
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index e04ab6c4ef..2b684bede3 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -271,6 +271,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
if (Inst.isBranch) OS << "|(1<<TID::Branch)";
if (Inst.isIndirectBranch) OS << "|(1<<TID::IndirectBranch)";
if (Inst.isCompare) OS << "|(1<<TID::Compare)";
+ if (Inst.isMoveImm) OS << "|(1<<TID::MoveImm)";
if (Inst.isBarrier) OS << "|(1<<TID::Barrier)";
if (Inst.hasDelaySlot) OS << "|(1<<TID::DelaySlot)";
if (Inst.isCall) OS << "|(1<<TID::Call)";