diff options
-rw-r--r-- | lib/Target/ARM/ARMFastISel.cpp | 31 | ||||
-rw-r--r-- | test/CodeGen/ARM/fast-isel-pred.ll | 60 |
2 files changed, 85 insertions, 6 deletions
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 26f48b3083..acb448569c 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -123,14 +123,15 @@ class ARMFastISel : public FastISel { const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, const ConstantFP *FPImm); - virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - uint64_t Imm); virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, uint64_t Imm); + virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm); + virtual unsigned FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx); @@ -193,6 +194,7 @@ class ARMFastISel : public FastISel { // OptionalDef handling routines. private: + bool isARMNEONPred(const MachineInstr *MI); bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); void AddLoadStoreOperands(EVT VT, Address &Addr, @@ -221,6 +223,21 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { return true; } +bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { + const TargetInstrDesc &TID = MI->getDesc(); + + // If we're a thumb2 or not NEON function we were handled via isPredicable. + if ((TID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || + AFI->isThumb2Function()) + return false; + + for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) + if (TID.OpInfo[i].isPredicate()) + return true; + + return false; +} + // If the machine is predicable go ahead and add the predicate operands, if // it needs default CC operands add those. // TODO: If we want to support thumb1 then we'll need to deal with optional @@ -230,10 +247,12 @@ const MachineInstrBuilder & ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { MachineInstr *MI = &*MIB; - // Do we use a predicate? - if (TII.isPredicable(MI)) + // Do we use a predicate? or... + // Are we NEON in ARM mode and have a predicate operand? If so, I know + // we're not predicable but add it anyways. + if (TII.isPredicable(MI) || isARMNEONPred(MI)) AddDefaultPred(MIB); - + // Do we optionally set a predicate? Preds is size > 0 iff the predicate // defines CPSR. All other OptionalDefines in ARM are the CCR register. bool CPSR = false; diff --git a/test/CodeGen/ARM/fast-isel-pred.ll b/test/CodeGen/ARM/fast-isel-pred.ll new file mode 100644 index 0000000000..ef436549e8 --- /dev/null +++ b/test/CodeGen/ARM/fast-isel-pred.ll @@ -0,0 +1,60 @@ +; ModuleID = 'test.c' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-n32" +target triple = "armv7-apple-darwin" + +define i32 @main() nounwind ssp { +entry: + %retval = alloca i32, align 4 + %X = alloca <4 x i32>, align 16 + %Y = alloca <4 x float>, align 16 + store i32 0, i32* %retval + %tmp = load <4 x i32>* %X, align 16 + call void @__aa(<4 x i32> %tmp, i8* null, i32 3, <4 x float>* %Y) + %0 = load i32* %retval + ret i32 %0 +} + +define internal void @__aa(<4 x i32> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp { +entry: + %__a.addr.i = alloca <4 x i32>, align 16 + %v.addr = alloca <4 x i32>, align 16 + %p.addr = alloca i8*, align 4 + %offset.addr = alloca i32, align 4 + %constants.addr = alloca <4 x float>*, align 4 + store <4 x i32> %v, <4 x i32>* %v.addr, align 16 + store i8* %p, i8** %p.addr, align 4 + store i32 %offset, i32* %offset.addr, align 4 + store <4 x float>* %constants, <4 x float>** %constants.addr, align 4 + %tmp = load <4 x i32>* %v.addr, align 16 + store <4 x i32> %tmp, <4 x i32>* %__a.addr.i, align 16 + %tmp.i = load <4 x i32>* %__a.addr.i, align 16 + %0 = bitcast <4 x i32> %tmp.i to <16 x i8> + %1 = bitcast <16 x i8> %0 to <4 x i32> + %vcvt.i = sitofp <4 x i32> %1 to <4 x float> + %tmp1 = load i8** %p.addr, align 4 + %tmp2 = load i32* %offset.addr, align 4 + %tmp3 = load <4 x float>** %constants.addr, align 4 + call void @__bb(<4 x float> %vcvt.i, i8* %tmp1, i32 %tmp2, <4 x float>* %tmp3) + ret void +} + +define internal void @__bb(<4 x float> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp { +entry: + %v.addr = alloca <4 x float>, align 16 + %p.addr = alloca i8*, align 4 + %offset.addr = alloca i32, align 4 + %constants.addr = alloca <4 x float>*, align 4 + %data = alloca i64, align 4 + store <4 x float> %v, <4 x float>* %v.addr, align 16 + store i8* %p, i8** %p.addr, align 4 + store i32 %offset, i32* %offset.addr, align 4 + store <4 x float>* %constants, <4 x float>** %constants.addr, align 4 + %tmp = load i64* %data, align 4 + %tmp1 = load i8** %p.addr, align 4 + %tmp2 = load i32* %offset.addr, align 4 + %add.ptr = getelementptr i8* %tmp1, i32 %tmp2 + %0 = bitcast i8* %add.ptr to i64* + %arrayidx = getelementptr inbounds i64* %0, i32 0 + store i64 %tmp, i64* %arrayidx + ret void +} |