support _lane ops, and multiplies by scalar.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@105770 91177308-0d34-0410-b5e6-96231b3b80d8
author: Nate Begeman <natebegeman@mac.com> 2010-06-10 00:17:56 +0000
committer: Nate Begeman <natebegeman@mac.com> 2010-06-10 00:17:56 +0000
commit: d075c01c359b9cc120c3accc7166990f9f4ac423 (patch)
tree: 07d3885554dc6f769d80156bea8511920421668b /lib
parent: c3926645d70842eae22641df1bf69da457a0ff11 (diff)
3 files changed, 63 insertions, 43 deletions
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 3e8fec5b35..dbf5352028 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -894,13 +894,24 @@ const llvm::Type *GetNeonType(LLVMContext &Ctx, unsigned type, bool q) {
   return 0;
 }
 
+Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
+  unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
+  SmallVector<Constant*, 16> Indices(nElts, C);
+  Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
+  return Builder.CreateShuffleVector(V, V, SV, "lane");
+}
+
 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
-                                     const char *name) {
+                                     const char *name, bool splat) {
   unsigned j = 0;
   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
        ai != ae; ++ai, ++j)
     Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
 
+  if (splat) {
+    Ops[j-1] = EmitNeonSplat(Ops[j-1], cast<Constant>(Ops[j]));
+    Ops.resize(j);
+  }
   return Builder.CreateCall(F, Ops.begin(), Ops.end(), name);
 }
 
@@ -917,9 +928,10 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
                                a, b);
   }
 
-  llvm::SmallVector<Value*, 4> Ops;
   // Determine the type of this overloaded NEON intrinsic.
   assert(BuiltinID > ARM::BI__builtin_thread_pointer);
+
+  llvm::SmallVector<Value*, 4> Ops;
   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++)
     Ops.push_back(EmitScalarExpr(E->getArg(i)));
 
@@ -931,11 +943,16 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   unsigned type = Result.getZExtValue();
   bool usgn = type & 0x08;
   bool quad = type & 0x10;
+  bool splat = false;
 
   const llvm::Type *Ty = GetNeonType(VMContext, type & 0x7, quad);
   if (!Ty)
     return 0;
 
+  // FIXME: multiplies by scalar do not currently match their patterns because
+  //   they are implemented via mul(splat(scalar_to_vector)) rather than
+  //   mul(dup(scalar))
+
   unsigned Int;
   switch (BuiltinID) {
   default: return 0;
@@ -1087,12 +1104,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   case ARM::BI__builtin_neon_vminq_v:
     Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
     return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmin");
-  // FIXME: vmlal_lane -> splat, drop imm
+  case ARM::BI__builtin_neon_vmlal_lane_v:
+    splat = true;
   case ARM::BI__builtin_neon_vmlal_v:
     Int = usgn ? Intrinsic::arm_neon_vmlalu : Intrinsic::arm_neon_vmlals;
-    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmlal");
-  // FIXME: vmlal_n, vmla_n, vmlsl_n, vmls_n, vmull_n, vmul_n,
-  //        vqdmlal_n, vqdmlsl_n, vqdmulh_n, vqdmull_n, vqrdmulh_n -> splat,-_n
+    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmlal", splat);
   case ARM::BI__builtin_neon_vmovl_v:
     Int = usgn ? Intrinsic::arm_neon_vmovlu : Intrinsic::arm_neon_vmovls;
     return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vmovl");
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 0682a0474b..50e334061e 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -1147,7 +1147,8 @@ public:
   llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitNeonCall(llvm::Function *F, 
                             llvm::SmallVectorImpl<llvm::Value*> &O,
-                            const char *name);
+                            const char *name, bool splat = false);
+  llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx);
   
   llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
diff --git a/lib/Headers/arm_neon.td b/lib/Headers/arm_neon.td
index 4dbecb66b9..fb298a67c6 100644
--- a/lib/Headers/arm_neon.td
+++ b/lib/Headers/arm_neon.td
@@ -14,29 +14,32 @@
 
 class Op;
 
-def OP_NONE : Op;
-def OP_ADD  : Op;
-def OP_SUB  : Op;
-def OP_MUL  : Op;
-def OP_MLA  : Op;
-def OP_MLS  : Op;
-def OP_EQ   : Op;
-def OP_GE   : Op;
-def OP_LE   : Op;
-def OP_GT   : Op;
-def OP_LT   : Op;
-def OP_NEG  : Op;
-def OP_NOT  : Op;
-def OP_AND  : Op;
-def OP_OR   : Op;
-def OP_XOR  : Op;
-def OP_ANDN : Op;
-def OP_ORN  : Op;
-def OP_CAST : Op;
-def OP_HI   : Op;
-def OP_LO   : Op;
-def OP_CONC : Op;
-def OP_DUP  : Op;
+def OP_NONE  : Op;
+def OP_ADD   : Op;
+def OP_SUB   : Op;
+def OP_MUL   : Op;
+def OP_MLA   : Op;
+def OP_MLS   : Op;
+def OP_MUL_N : Op;
+def OP_MLA_N : Op;
+def OP_MLS_N : Op;
+def OP_EQ    : Op;
+def OP_GE    : Op;
+def OP_LE    : Op;
+def OP_GT    : Op;
+def OP_LT    : Op;
+def OP_NEG   : Op;
+def OP_NOT   : Op;
+def OP_AND   : Op;
+def OP_OR    : Op;
+def OP_XOR   : Op;
+def OP_ANDN  : Op;
+def OP_ORN   : Op;
+def OP_CAST  : Op;
+def OP_HI    : Op;
+def OP_LO    : Op;
+def OP_CONC  : Op;
+def OP_DUP   : Op;
 
 class Inst <string p, string t, Op o> {
   string Prototype = p;
@@ -48,7 +51,6 @@ class Inst <string p, string t, Op o> {
 class SInst<string p, string t> : Inst<p, t, OP_NONE> {}
 class IInst<string p, string t> : Inst<p, t, OP_NONE> {}
 class WInst<string p, string t> : Inst<p, t, OP_NONE> {}
-class BInst<string p, string t> : Inst<p, t, OP_NONE> {}
 
 // prototype: return (arg, arg, ...)
 // v: void
@@ -64,6 +66,7 @@ class BInst<string p, string t> : Inst<p, t, OP_NONE> {}
 // i: constant int
 // l: constant uint64
 // s: scalar of element type
+// a: scalar of element type (splat to vector type)
 // k: default elt width, double num elts
 // #: array of default vectors
 // p: pointer type
@@ -273,21 +276,21 @@ def VQDMLAL_LANE  : SInst<"wwddi", "si">;
 def VMLS_LANE     : IInst<"ddddi", "siUsUifQsQiQUsQUiQf">;
 def VMLSL_LANE    : SInst<"wwddi", "siUsUi">;
 def VQDMLSL_LANE  : SInst<"wwddi", "si">;
-def VMUL_N        : IInst<"dds",   "sifUsUiQsQiQfQUsQUi">;
-def VMULL_N       : SInst<"wds",   "siUsUi">;
+def VMUL_N        : Inst<"dds",    "sifUsUiQsQiQfQUsQUi", OP_MUL_N>;
+def VMULL_N       : SInst<"wda",   "siUsUi">;
 def VMULL_LANE    : SInst<"wddi",  "siUsUi">;
-def VQDMULL_N     : SInst<"wds",   "si">;
+def VQDMULL_N     : SInst<"wda",   "si">;
 def VQDMULL_LANE  : SInst<"wddi",  "si">;
-def VQDMULH_N     : SInst<"dds",   "siQsQi">;
+def VQDMULH_N     : SInst<"dda",   "siQsQi">;
 def VQDMULH_LANE  : SInst<"dddi",  "siQsQi">;
-def VQRDMULH_N    : SInst<"dds",   "siQsQi">;
+def VQRDMULH_N    : SInst<"dda",   "siQsQi">;
 def VQRDMULH_LANE : SInst<"dddi",  "siQsQi">;
-def VMLA_N        : IInst<"ddds",  "siUsUifQsQiQUsQUiQf">;
-def VMLAL_N       : SInst<"wwds",  "siUsUi">;
-def VQDMLAL_N     : SInst<"wwds",  "si">;
-def VMLS_N        : IInst<"ddds",  "siUsUifQsQiQUsQUiQf">;
-def VMLSL_N       : SInst<"wwds",  "siUsUi">;
-def VQDMLSL_N     : SInst<"wwds",  "si">;
+def VMLA_N        : Inst<"ddda",   "siUsUifQsQiQUsQUiQf", OP_MLA_N>;
+def VMLAL_N       : SInst<"wwda",  "siUsUi">;
+def VQDMLAL_N     : SInst<"wwda",  "si">;
+def VMLS_N        : Inst<"ddds",   "siUsUifQsQiQUsQUiQf", OP_MLS_N>;
+def VMLSL_N       : SInst<"wwda",  "siUsUi">;
+def VQDMLSL_N     : SInst<"wwda",  "si">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.26 Vector Extract
@@ -319,7 +322,7 @@ def VORR : Inst<"ddd",   "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_OR>;
 def VEOR : Inst<"ddd",   "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_XOR>;
 def VBIC : Inst<"ddd",   "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ANDN>;
 def VORN : Inst<"ddd",   "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ORN>;
-def VBSL : BInst<"dudd", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPs">;
+def VBSL : SInst<"dudd", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPs">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.30 Transposition operations
author	Nate Begeman <natebegeman@mac.com>	2010-06-10 00:17:56 +0000
committer	Nate Begeman <natebegeman@mac.com>	2010-06-10 00:17:56 +0000
commit	d075c01c359b9cc120c3accc7166990f9f4ac423 (patch)
tree	07d3885554dc6f769d80156bea8511920421668b /lib
parent	c3926645d70842eae22641df1bf69da457a0ff11 (diff)