diff options
author | Nate Begeman <natebegeman@mac.com> | 2010-06-10 00:17:56 +0000 |
---|---|---|
committer | Nate Begeman <natebegeman@mac.com> | 2010-06-10 00:17:56 +0000 |
commit | d075c01c359b9cc120c3accc7166990f9f4ac423 (patch) | |
tree | 07d3885554dc6f769d80156bea8511920421668b /lib | |
parent | c3926645d70842eae22641df1bf69da457a0ff11 (diff) |
support _lane ops, and multiplies by scalar.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@105770 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/CodeGen/CGBuiltin.cpp | 28 | ||||
-rw-r--r-- | lib/CodeGen/CodeGenFunction.h | 3 | ||||
-rw-r--r-- | lib/Headers/arm_neon.td | 75 |
3 files changed, 63 insertions, 43 deletions
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 3e8fec5b35..dbf5352028 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -894,13 +894,24 @@ const llvm::Type *GetNeonType(LLVMContext &Ctx, unsigned type, bool q) { return 0; } +Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { + unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements(); + SmallVector<Constant*, 16> Indices(nElts, C); + Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size()); + return Builder.CreateShuffleVector(V, V, SV, "lane"); +} + Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, - const char *name) { + const char *name, bool splat) { unsigned j = 0; for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae; ++ai, ++j) Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); + if (splat) { + Ops[j-1] = EmitNeonSplat(Ops[j-1], cast<Constant>(Ops[j])); + Ops.resize(j); + } return Builder.CreateCall(F, Ops.begin(), Ops.end(), name); } @@ -917,9 +928,10 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, a, b); } - llvm::SmallVector<Value*, 4> Ops; // Determine the type of this overloaded NEON intrinsic. assert(BuiltinID > ARM::BI__builtin_thread_pointer); + + llvm::SmallVector<Value*, 4> Ops; for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) Ops.push_back(EmitScalarExpr(E->getArg(i))); @@ -931,11 +943,16 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, unsigned type = Result.getZExtValue(); bool usgn = type & 0x08; bool quad = type & 0x10; + bool splat = false; const llvm::Type *Ty = GetNeonType(VMContext, type & 0x7, quad); if (!Ty) return 0; + // FIXME: multiplies by scalar do not currently match their patterns because + // they are implemented via mul(splat(scalar_to_vector)) rather than + // mul(dup(scalar)) + unsigned Int; switch (BuiltinID) { default: return 0; @@ -1087,12 +1104,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case ARM::BI__builtin_neon_vminq_v: Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins; return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmin"); - // FIXME: vmlal_lane -> splat, drop imm + case ARM::BI__builtin_neon_vmlal_lane_v: + splat = true; case ARM::BI__builtin_neon_vmlal_v: Int = usgn ? Intrinsic::arm_neon_vmlalu : Intrinsic::arm_neon_vmlals; - return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmlal"); - // FIXME: vmlal_n, vmla_n, vmlsl_n, vmls_n, vmull_n, vmul_n, - // vqdmlal_n, vqdmlsl_n, vqdmulh_n, vqdmull_n, vqrdmulh_n -> splat,-_n + return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmlal", splat); case ARM::BI__builtin_neon_vmovl_v: Int = usgn ? Intrinsic::arm_neon_vmovlu : Intrinsic::arm_neon_vmovls; return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmovl"); diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 0682a0474b..50e334061e 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -1147,7 +1147,8 @@ public: llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitNeonCall(llvm::Function *F, llvm::SmallVectorImpl<llvm::Value*> &O, - const char *name); + const char *name, bool splat = false); + llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx); llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E); diff --git a/lib/Headers/arm_neon.td b/lib/Headers/arm_neon.td index 4dbecb66b9..fb298a67c6 100644 --- a/lib/Headers/arm_neon.td +++ b/lib/Headers/arm_neon.td @@ -14,29 +14,32 @@ class Op; -def OP_NONE : Op; -def OP_ADD : Op; -def OP_SUB : Op; -def OP_MUL : Op; -def OP_MLA : Op; -def OP_MLS : Op; -def OP_EQ : Op; -def OP_GE : Op; -def OP_LE : Op; -def OP_GT : Op; -def OP_LT : Op; -def OP_NEG : Op; -def OP_NOT : Op; -def OP_AND : Op; -def OP_OR : Op; -def OP_XOR : Op; -def OP_ANDN : Op; -def OP_ORN : Op; -def OP_CAST : Op; -def OP_HI : Op; -def OP_LO : Op; -def OP_CONC : Op; -def OP_DUP : Op; +def OP_NONE : Op; +def OP_ADD : Op; +def OP_SUB : Op; +def OP_MUL : Op; +def OP_MLA : Op; +def OP_MLS : Op; +def OP_MUL_N : Op; +def OP_MLA_N : Op; +def OP_MLS_N : Op; +def OP_EQ : Op; +def OP_GE : Op; +def OP_LE : Op; +def OP_GT : Op; +def OP_LT : Op; +def OP_NEG : Op; +def OP_NOT : Op; +def OP_AND : Op; +def OP_OR : Op; +def OP_XOR : Op; +def OP_ANDN : Op; +def OP_ORN : Op; +def OP_CAST : Op; +def OP_HI : Op; +def OP_LO : Op; +def OP_CONC : Op; +def OP_DUP : Op; class Inst <string p, string t, Op o> { string Prototype = p; @@ -48,7 +51,6 @@ class Inst <string p, string t, Op o> { class SInst<string p, string t> : Inst<p, t, OP_NONE> {} class IInst<string p, string t> : Inst<p, t, OP_NONE> {} class WInst<string p, string t> : Inst<p, t, OP_NONE> {} -class BInst<string p, string t> : Inst<p, t, OP_NONE> {} // prototype: return (arg, arg, ...) // v: void @@ -64,6 +66,7 @@ class BInst<string p, string t> : Inst<p, t, OP_NONE> {} // i: constant int // l: constant uint64 // s: scalar of element type +// a: scalar of element type (splat to vector type) // k: default elt width, double num elts // #: array of default vectors // p: pointer type @@ -273,21 +276,21 @@ def VQDMLAL_LANE : SInst<"wwddi", "si">; def VMLS_LANE : IInst<"ddddi", "siUsUifQsQiQUsQUiQf">; def VMLSL_LANE : SInst<"wwddi", "siUsUi">; def VQDMLSL_LANE : SInst<"wwddi", "si">; -def VMUL_N : IInst<"dds", "sifUsUiQsQiQfQUsQUi">; -def VMULL_N : SInst<"wds", "siUsUi">; +def VMUL_N : Inst<"dds", "sifUsUiQsQiQfQUsQUi", OP_MUL_N>; +def VMULL_N : SInst<"wda", "siUsUi">; def VMULL_LANE : SInst<"wddi", "siUsUi">; -def VQDMULL_N : SInst<"wds", "si">; +def VQDMULL_N : SInst<"wda", "si">; def VQDMULL_LANE : SInst<"wddi", "si">; -def VQDMULH_N : SInst<"dds", "siQsQi">; +def VQDMULH_N : SInst<"dda", "siQsQi">; def VQDMULH_LANE : SInst<"dddi", "siQsQi">; -def VQRDMULH_N : SInst<"dds", "siQsQi">; +def VQRDMULH_N : SInst<"dda", "siQsQi">; def VQRDMULH_LANE : SInst<"dddi", "siQsQi">; -def VMLA_N : IInst<"ddds", "siUsUifQsQiQUsQUiQf">; -def VMLAL_N : SInst<"wwds", "siUsUi">; -def VQDMLAL_N : SInst<"wwds", "si">; -def VMLS_N : IInst<"ddds", "siUsUifQsQiQUsQUiQf">; -def VMLSL_N : SInst<"wwds", "siUsUi">; -def VQDMLSL_N : SInst<"wwds", "si">; +def VMLA_N : Inst<"ddda", "siUsUifQsQiQUsQUiQf", OP_MLA_N>; +def VMLAL_N : SInst<"wwda", "siUsUi">; +def VQDMLAL_N : SInst<"wwda", "si">; +def VMLS_N : Inst<"ddds", "siUsUifQsQiQUsQUiQf", OP_MLS_N>; +def VMLSL_N : SInst<"wwda", "siUsUi">; +def VQDMLSL_N : SInst<"wwda", "si">; //////////////////////////////////////////////////////////////////////////////// // E.3.26 Vector Extract @@ -319,7 +322,7 @@ def VORR : Inst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_OR>; def VEOR : Inst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_XOR>; def VBIC : Inst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ANDN>; def VORN : Inst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ORN>; -def VBSL : BInst<"dudd", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPs">; +def VBSL : SInst<"dudd", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPs">; //////////////////////////////////////////////////////////////////////////////// // E.3.30 Transposition operations |