diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 17 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 127 | ||||
-rw-r--r-- | lib/VMCore/AutoUpgrade.cpp | 62 |
3 files changed, 161 insertions, 45 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 0a41d6d957..ce4a2c9068 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -4293,28 +4293,11 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, /// operands. static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI) { - SelectionDAG &DAG = DCI.DAG; - // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { SDValue Result = combineSelectAndUse(N, N0, N1, DCI); if (Result.getNode()) return Result; } - - // fold (add (arm_neon_vabd a, b) c) -> (arm_neon_vaba c, a, b) - EVT VT = N->getValueType(0); - if (N0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && VT.isInteger()) { - unsigned IntNo = cast<ConstantSDNode>(N0.getOperand(0))->getZExtValue(); - if (IntNo == Intrinsic::arm_neon_vabds) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), VT, - DAG.getConstant(Intrinsic::arm_neon_vabas, MVT::i32), - N1, N0.getOperand(1), N0.getOperand(2)); - if (IntNo == Intrinsic::arm_neon_vabdu) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), VT, - DAG.getConstant(Intrinsic::arm_neon_vabau, MVT::i32), - N1, N0.getOperand(1), N0.getOperand(2)); - } - return SDValue(); } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index fb51a2f873..8c8d1d7b8a 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1288,6 +1288,24 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, (ResTy (NEONvduplane (OpTy DPR_8:$src3), imm:$lane)))))))]>; +// Neon Intrinsic-Op instructions (VABA): double- and quad-register. +class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp, SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", + [(set DPR:$dst, (Ty (OpNode DPR:$src1, + (Ty (IntOp (Ty DPR:$src2), (Ty DPR:$src3))))))]>; +class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp, SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 1, op4, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", + [(set QPR:$dst, (Ty (OpNode QPR:$src1, + (Ty (IntOp (Ty QPR:$src2), (Ty QPR:$src3))))))]>; + // Neon 3-argument intrinsics, both double- and quad-register. // The destination register is also used as the first source operand register. class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, @@ -1342,6 +1360,17 @@ class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, (TyD (NEONvduplane (TyD DPR_8:$src3), imm:$lane))))))]>; +// Long Intrinsic-Op vector operations with explicit extend (VABAL). +class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, + SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", + [(set QPR:$dst, (OpNode (TyQ QPR:$src1), + (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src2), + (TyD DPR:$src3)))))))]>; // Neon Long 3-argument intrinsic. The destination register is // a quad-register and is also used as the first source operand register. @@ -1433,6 +1462,19 @@ class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, let isCommutable = Commutable; } +// Long 3-register intrinsics with explicit extend (VABDL). +class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, + bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src1, $src2", "", + [(set QPR:$dst, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src1), + (TyD DPR:$src2))))))]> { + let isCommutable = Commutable; +} + // Long 3-register intrinsics. class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, @@ -1918,6 +1960,21 @@ multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, v8i16, v8i8, IntOp, Commutable>; } +// ....with explicit extend (VABDL). +multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> { + def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "8"), + v8i16, v8i8, IntOp, ExtOp, Commutable>; + def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "16"), + v4i32, v4i16, IntOp, ExtOp, Commutable>; + def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "32"), + v2i64, v2i32, IntOp, ExtOp, Commutable>; +} + // Neon Wide 3-register vector intrinsics, // source operand element sizes of 8, 16 and 32 bits: @@ -1975,6 +2032,29 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8, mul, ShOp>; } +// Neon Intrinsic-Op vector operations, +// element sizes of 8, 16 and 32 bits: +multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itinD, InstrItinClass itinQ, + string OpcodeStr, string Dt, Intrinsic IntOp, + SDNode OpNode> { + // 64-bit vector types. + def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, + OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; + def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, + OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; + def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, + OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; + + // 128-bit vector types. + def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, + OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; + def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, + OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; + def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, + OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; +} + // Neon 3-argument intrinsics, // element sizes of 8, 16 and 32 bits: multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, @@ -2050,6 +2130,21 @@ multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; } +// ....with explicit extend (VABAL). +multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> { + def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, + IntOp, ExtOp, OpNode>; + def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, + IntOp, ExtOp, OpNode>; + def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, + IntOp, ExtOp, OpNode>; +} + // Neon 2-register vector intrinsics, // element sizes of 8, 16 and 32 bits: @@ -2765,32 +2860,32 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, // VABD : Vector Absolute Difference defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabd", "s", int_arm_neon_vabds, 0>; + "vabd", "s", int_arm_neon_vabds, 1>; defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabd", "u", int_arm_neon_vabdu, 0>; + "vabd", "u", int_arm_neon_vabdu, 1>; def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, - "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>; + "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, - "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>; + "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; // VABDL : Vector Absolute Difference Long (Q = | D - D |) -defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabdl", "s", int_arm_neon_vabdls, 0>; -defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabdl", "u", int_arm_neon_vabdlu, 0>; +defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, + "vabdl", "s", int_arm_neon_vabds, zext, 1>; +defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, + "vabdl", "u", int_arm_neon_vabdu, zext, 1>; // VABA : Vector Absolute Difference and Accumulate -defm VABAs : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, - "vaba", "s", int_arm_neon_vabas>; -defm VABAu : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, - "vaba", "u", int_arm_neon_vabau>; +defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, + "vaba", "s", int_arm_neon_vabds, add>; +defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, + "vaba", "u", int_arm_neon_vabdu, add>; // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) -defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD, - "vabal", "s", int_arm_neon_vabals>; -defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD, - "vabal", "u", int_arm_neon_vabalu>; +defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, + "vabal", "s", int_arm_neon_vabds, zext, add>; +defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, + "vabal", "u", int_arm_neon_vabdu, zext, add>; // Vector Maximum and Minimum. diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index e625d413f5..fd64460d6f 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -81,21 +81,21 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { } else if (Name.compare(5, 9, "arm.neon.", 9) == 0) { if (((Name.compare(14, 5, "vmovl", 5) == 0 || Name.compare(14, 5, "vaddl", 5) == 0 || - Name.compare(14, 5, "vsubl", 5) == 0) && - (Name.compare(19, 2, "s.", 2) == 0 || - Name.compare(19, 2, "u.", 2) == 0)) || - - ((Name.compare(14, 5, "vaddw", 5) == 0 || - Name.compare(14, 5, "vsubw", 5) == 0) && - (Name.compare(19, 2, "s.", 2) == 0 || - Name.compare(19, 2, "u.", 2) == 0)) || - - ((Name.compare(14, 5, "vmull", 5) == 0 || + Name.compare(14, 5, "vsubl", 5) == 0 || + Name.compare(14, 5, "vaddw", 5) == 0 || + Name.compare(14, 5, "vsubw", 5) == 0 || + Name.compare(14, 5, "vmull", 5) == 0 || Name.compare(14, 5, "vmlal", 5) == 0 || - Name.compare(14, 5, "vmlsl", 5) == 0) && + Name.compare(14, 5, "vmlsl", 5) == 0 || + Name.compare(14, 5, "vabdl", 5) == 0 || + Name.compare(14, 5, "vabal", 5) == 0) && (Name.compare(19, 2, "s.", 2) == 0 || Name.compare(19, 2, "u.", 2) == 0)) || + (Name.compare(14, 4, "vaba", 4) == 0 && + (Name.compare(18, 2, "s.", 2) == 0 || + Name.compare(18, 2, "u.", 2) == 0)) || + (Name.compare(14, 6, "vmovn.", 6) == 0)) { // Calls to these are transformed into IR without intrinsics. @@ -391,6 +391,35 @@ static void ExtendNEONArgs(CallInst *CI, Value *Arg0, Value *Arg1, } } +/// CallVABD - As part of expanding a call to one of the old NEON vabdl, vaba, +/// or vabal intrinsics, construct a call to a vabd intrinsic. Examine the +/// name of the old intrinsic to determine whether to use a signed or unsigned +/// vabd intrinsic. Get the type from the old call instruction, adjusted for +/// half-size vector elements if the old intrinsic was vabdl or vabal. +static Instruction *CallVABD(CallInst *CI, Value *Arg0, Value *Arg1) { + Function *F = CI->getCalledFunction(); + const std::string& Name = F->getName(); + bool isLong = (Name.at(18) == 'l'); + bool isSigned = (Name.at(isLong ? 19 : 18) == 's'); + + Intrinsic::ID intID; + if (isSigned) + intID = Intrinsic::arm_neon_vabds; + else + intID = Intrinsic::arm_neon_vabdu; + + const Type *Ty = CI->getType(); + if (isLong) + Ty = VectorType::getTruncatedElementVectorType(cast<const VectorType>(Ty)); + + Function *VABD = Intrinsic::getDeclaration(F->getParent(), intID, &Ty, 1); + Value *Operands[2]; + Operands[0] = Arg0; + Operands[1] = Arg1; + return CallInst::Create(VABD, Operands, Operands+2, + "upgraded."+CI->getName(), CI); +} + // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the // upgraded intrinsic. All argument and return casting must be provided in // order to seamlessly integrate with existing context. @@ -434,6 +463,15 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI); NewI = BinaryOperator::CreateSub(CI->getArgOperand(0), MulI, "upgraded."+CI->getName(), CI); + } else if (Name.compare(14, 4, "vabd", 4) == 0) { + NewI = CallVABD(CI, CI->getArgOperand(0), CI->getArgOperand(1)); + NewI = new ZExtInst(NewI, CI->getType(), "upgraded."+CI->getName(), CI); + } else if (Name.compare(14, 4, "vaba", 4) == 0) { + NewI = CallVABD(CI, CI->getArgOperand(1), CI->getArgOperand(2)); + if (Name.at(18) == 'l') + NewI = new ZExtInst(NewI, CI->getType(), "", CI); + NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), NewI, + "upgraded."+CI->getName(), CI); } else if (Name.compare(14, 6, "vmovn.", 6) == 0) { NewI = new TruncInst(CI->getArgOperand(0), CI->getType(), "upgraded." + CI->getName(), CI); @@ -675,7 +713,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { } switch (NewFn->getIntrinsicID()) { - default: llvm_unreachable("Unknown function for CallInst upgrade."); + default: llvm_unreachable("Unknown function for CallInst upgrade."); case Intrinsic::arm_neon_vld1: case Intrinsic::arm_neon_vld2: case Intrinsic::arm_neon_vld3: |