diff options
author | Nate Begeman <natebegeman@mac.com> | 2010-06-08 00:17:19 +0000 |
---|---|---|
committer | Nate Begeman <natebegeman@mac.com> | 2010-06-08 00:17:19 +0000 |
commit | 9eb65a56e18bee1e5392bf2dff01cbd7b895f685 (patch) | |
tree | 629bbd31aeb56cc6698a67100b84fb411bb91f9d | |
parent | 37b6a5731a47f811d754f0d48aa93edf30e30513 (diff) |
Implement ARM NEON up through vcvt, alphabetically.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@105590 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/clang/Basic/BuiltinsARM.def | 22 | ||||
-rw-r--r-- | lib/CodeGen/CGBuiltin.cpp | 58 | ||||
-rw-r--r-- | lib/Headers/arm_neon.td | 61 |
3 files changed, 95 insertions, 46 deletions
diff --git a/include/clang/Basic/BuiltinsARM.def b/include/clang/Basic/BuiltinsARM.def index 4a8b9ac965..41fa0bf342 100644 --- a/include/clang/Basic/BuiltinsARM.def +++ b/include/clang/Basic/BuiltinsARM.def @@ -48,12 +48,12 @@ BUILTIN(__builtin_neon_vclz_v, "V8cV8ci", "n") BUILTIN(__builtin_neon_vclzq_v, "V16cV16ci", "n") BUILTIN(__builtin_neon_vcnt_v, "V8cV8ci", "n") BUILTIN(__builtin_neon_vcntq_v, "V16cV16ci", "n") -BUILTIN(__builtin_neon_vcombine_v, "V16cV8cV8ci", "n") -BUILTIN(__builtin_neon_vcvt_f16_v, "V8cV8ci", "n") -BUILTIN(__builtin_neon_vcvt_f32_v, "V8cV8ci", "n") -BUILTIN(__builtin_neon_vcvtq_f32_v, "V16cV16ci", "n") -BUILTIN(__builtin_neon_vcvt_n_f32_v, "V8cV8cii", "n") -BUILTIN(__builtin_neon_vcvtq_n_f32_v, "V16cV16cii", "n") +BUILTIN(__builtin_neon_vcvt_f16_v, "V8cV16ci", "n") +BUILTIN(__builtin_neon_vcvt_f32_v, "V2fV8ci", "n") +BUILTIN(__builtin_neon_vcvtq_f32_v, "V4fV16ci", "n") +BUILTIN(__builtin_neon_vcvt_f32_f16, "V16cV8ci", "n") +BUILTIN(__builtin_neon_vcvt_n_f32_v, "V2fV8cii", "n") +BUILTIN(__builtin_neon_vcvtq_n_f32_v, "V4fV16cii", "n") BUILTIN(__builtin_neon_vcvt_n_s32_v, "V8cV8cii", "n") BUILTIN(__builtin_neon_vcvtq_n_s32_v, "V16cV16cii", "n") BUILTIN(__builtin_neon_vcvt_n_u32_v, "V8cV8cii", "n") @@ -62,16 +62,6 @@ BUILTIN(__builtin_neon_vcvt_s32_v, "V8cV8ci", "n") BUILTIN(__builtin_neon_vcvtq_s32_v, "V16cV16ci", "n") BUILTIN(__builtin_neon_vcvt_u32_v, "V8cV8ci", "n") BUILTIN(__builtin_neon_vcvtq_u32_v, "V16cV16ci", "n") -BUILTIN(__builtin_neon_vdup_n_i8, "V8cUc", "n") -BUILTIN(__builtin_neon_vdup_n_i16, "V8cUs", "n") -BUILTIN(__builtin_neon_vdup_n_i32, "V8cUi", "n") -BUILTIN(__builtin_neon_vdup_n_f32, "V8cf", "n") -BUILTIN(__builtin_neon_vdupq_n_i8, "V16cUc", "n") -BUILTIN(__builtin_neon_vdupq_n_i16, "V16cUs", "n") -BUILTIN(__builtin_neon_vdupq_n_i32, "V16cUi", "n") -BUILTIN(__builtin_neon_vdupq_n_f32, "V16cf", "n") -BUILTIN(__builtin_neon_vdup_n_i64, "V8cULLi", "n") -BUILTIN(__builtin_neon_vdupq_n_i64, "V16cULLi", "n") BUILTIN(__builtin_neon_vext_v, "V8cV8cV8cii", "n") BUILTIN(__builtin_neon_vextq_v, "V16cV16cV16cii", "n") BUILTIN(__builtin_neon_vget_high_v, "V8cV16ci", "n") diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 409829687a..1f308eb240 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -975,13 +975,69 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Value *F = CGM.getIntrinsic(Int, &Ty, 1); return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "vaddw"); } - // FIXME: vbsl -> or ((0 & 1), (0 & 2)), impl. with generic ops? + // FIXME: vbsl -> or ((0 & 1), (0 & 2)) in arm_neon.h + case ARM::BI__builtin_neon_vcale_v: + std::swap(Ops[0], Ops[1]); case ARM::BI__builtin_neon_vcage_v: return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vacged), &Ops[0], &Ops[0] + 2, "vcage"); + case ARM::BI__builtin_neon_vcaleq_v: + std::swap(Ops[0], Ops[1]); case ARM::BI__builtin_neon_vcageq_v: return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq), &Ops[0], &Ops[0] + 2, "vcage"); + case ARM::BI__builtin_neon_vcalt_v: + std::swap(Ops[0], Ops[1]); + case ARM::BI__builtin_neon_vcagt_v: + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd), + &Ops[0], &Ops[0] + 2, "vcagt"); + case ARM::BI__builtin_neon_vcaltq_v: + std::swap(Ops[0], Ops[1]); + case ARM::BI__builtin_neon_vcagtq_v: + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq), + &Ops[0], &Ops[0] + 2, "vcagt"); + case ARM::BI__builtin_neon_vcls_v: + case ARM::BI__builtin_neon_vclsq_v: { + Value *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, &Ty, 1); + return Builder.CreateCall(F, &Ops[0], &Ops[0] + 1, "vcls"); + } + case ARM::BI__builtin_neon_vclz_v: + case ARM::BI__builtin_neon_vclzq_v: { + Value *F = CGM.getIntrinsic(Intrinsic::arm_neon_vclz, &Ty, 1); + return Builder.CreateCall(F, &Ops[0], &Ops[0] + 1, "vclz"); + } + case ARM::BI__builtin_neon_vcnt_v: + case ARM::BI__builtin_neon_vcntq_v: { + Value *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcnt, &Ty, 1); + return Builder.CreateCall(F, &Ops[0], &Ops[0] + 1, "vcnt"); + } + // FIXME: intrinsics for f16<->f32 convert missing from ARM target. + case ARM::BI__builtin_neon_vcvt_f32_v: + case ARM::BI__builtin_neon_vcvtq_f32_v: { + return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") + : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); + } + case ARM::BI__builtin_neon_vcvt_s32_v: + case ARM::BI__builtin_neon_vcvt_u32_v: + case ARM::BI__builtin_neon_vcvtq_s32_v: + case ARM::BI__builtin_neon_vcvtq_u32_v: { + return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") + : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); + } + case ARM::BI__builtin_neon_vcvt_n_f32_v: + case ARM::BI__builtin_neon_vcvtq_n_f32_v: { + Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp : Intrinsic::arm_neon_vcvtfxs2fp; + Value *F = CGM.getIntrinsic(Int, &Ty, 1); + return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "vcvt_n"); + } + case ARM::BI__builtin_neon_vcvt_n_s32_v: + case ARM::BI__builtin_neon_vcvt_n_u32_v: + case ARM::BI__builtin_neon_vcvtq_n_s32_v: + case ARM::BI__builtin_neon_vcvtq_n_u32_v: { + Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu : Intrinsic::arm_neon_vcvtfp2fxs; + Value *F = CGM.getIntrinsic(Int, &Ty, 1); + return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "vcvt_n"); + } } } diff --git a/lib/Headers/arm_neon.td b/lib/Headers/arm_neon.td index 3d760d647d..c8c8ecd3db 100644 --- a/lib/Headers/arm_neon.td +++ b/lib/Headers/arm_neon.td @@ -33,6 +33,8 @@ def OP_XOR : Op; def OP_ANDN : Op; def OP_ORN : Op; def OP_CAST : Op; +def OP_CONC : Op; +def OP_DUP : Op; class Inst <string p, string t, Op o> { string Prototype = p; @@ -49,7 +51,8 @@ class BInst<string p, string t> : Inst<p, t, OP_NONE> {} // prototype: return (arg, arg, ...) // v: void // t: best-fit integer (int/poly args) -// x: unsigned integer (int/float args) +// x: signed integer (int/float args) +// u: unsigned integer (int/float args) // f: float (int args) // d: default // w: double width elements, same num elts @@ -114,16 +117,16 @@ def VRSUBHN : IInst<"dww", "csiUcUsUi">; //////////////////////////////////////////////////////////////////////////////// // E.3.4 Comparison -def VCEQ : Inst<"xdd", "csifUcUsUiPcQcQsQiQfQUcQUsQUiQPc", OP_EQ>; -def VCGE : Inst<"xdd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GE>; -def VCLE : Inst<"xdd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LE>; -def VCGT : Inst<"xdd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GT>; -def VCLT : Inst<"xdd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LT>; -def VCAGE : IInst<"xdd", "fQf">; -def VCALE : IInst<"xdd", "fQf">; -def VCAGT : IInst<"xdd", "fQf">; -def VCALT : IInst<"xdd", "fQf">; -def VTST : WInst<"xdd", "csiUcUsUiPcQcQsQiQUcQUsQUiQPc">; +def VCEQ : Inst<"udd", "csifUcUsUiPcQcQsQiQfQUcQUsQUiQPc", OP_EQ>; +def VCGE : Inst<"udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GE>; +def VCLE : Inst<"udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LE>; +def VCGT : Inst<"udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GT>; +def VCLT : Inst<"udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LT>; +def VCAGE : IInst<"udd", "fQf">; +def VCALE : IInst<"udd", "fQf">; +def VCAGT : IInst<"udd", "fQf">; +def VCALT : IInst<"udd", "fQf">; +def VTST : WInst<"udd", "csiUcUsUiPcQcQsQiQUcQUsQUiQPc">; //////////////////////////////////////////////////////////////////////////////// // E.3.5 Absolute Difference @@ -168,10 +171,10 @@ def VRSHR_N : SInst<"ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; def VSRA_N : SInst<"dddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; def VRSRA_N : SInst<"dddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; def VQSHL_N : SInst<"ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VQSHLU_N : SInst<"xdi", "csilQcQsQiQl">; +def VQSHLU_N : SInst<"udi", "csilQcQsQiQl">; def VSHRN_N : IInst<"dwi", "csiUcUsUi">; -def VQSHRUN_N : SInst<"xwi", "csi">; -def VQRSHRUN_N : SInst<"xwi", "csi">; +def VQSHRUN_N : SInst<"uwi", "csi">; +def VQRSHRUN_N : SInst<"uwi", "csi">; def VQSHRN_N : SInst<"dwi", "csiUcUsUi">; def VRSHRN_N : IInst<"dwi", "csiUcUsUi">; def VQRSHRN_N : SInst<"dwi", "csiUcUsUi">; @@ -222,12 +225,12 @@ def VCREATE: Inst<"dl", "csihfUcUsUiUlPcPsl", OP_CAST>; //////////////////////////////////////////////////////////////////////////////// // E.3.19 Set all lanes to same value -def VDUP_N : IInst<"ds", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">; +def VDUP_N : Inst<"ds", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", OP_DUP>; def VMOV_N : IInst<"ds", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">; //////////////////////////////////////////////////////////////////////////////// // E.3.20 Combining vectors -def VCOMBINE : WInst<"kdd", "csilhfUcUsUiUlPcPs">; +def VCOMBINE : Inst<"kdd", "csilhfUcUsUiUlPcPs", OP_CONC>; //////////////////////////////////////////////////////////////////////////////// // E.3.21 Splitting vectors @@ -236,18 +239,18 @@ def VGET_LOW : WInst<"dk", "csilhfUcUsUiUlPcPs">; //////////////////////////////////////////////////////////////////////////////// // E.3.22 Converting vectors -// FIXME: vmovn, vqmovn, vqmovun have wrong suffixes. -def VCVT_S32 : SInst<"df", "iQi">; -def VCVT_U32 : SInst<"df", "UiQUi">; -def VCVT_F16 : SInst<"df", "h">; -def VCVT_N_S32 : SInst<"dfi", "iQi">; -def VCVT_N_U32 : SInst<"dfi", "UiQUi">; -def VCVT_F32 : SInst<"fd", "iUiQiQUih">; -def VCVT_N_F32 : SInst<"fdi", "iUiQiQUi">; -def VMOVN : IInst<"hk", "silUsUiUl">; -def VMOVL : SInst<"wd", "csiUcUsUi">; -def VQMOVN : SInst<"hk", "silUsUiUl">; -def VQMOVUN : SInst<"ek", "sil">; +def VCVT_S32 : SInst<"xd", "fQf">; +def VCVT_U32 : SInst<"ud", "fQf">; +def VCVT_F16 : SInst<"hk", "f">; +def VCVT_N_S32 : SInst<"xdi", "fQf">; +def VCVT_N_U32 : SInst<"udi", "fQf">; +def VCVT_F32 : SInst<"fd", "iUiQiQUi">; +def VCVT_F32_F16 : SInst<"kh", "f">; +def VCVT_N_F32 : SInst<"fdi", "iUiQiQUi">; +def VMOVN : IInst<"hk", "silUsUiUl">; +def VMOVL : SInst<"wd", "csiUcUsUi">; +def VQMOVN : SInst<"hk", "silUsUiUl">; +def VQMOVUN : SInst<"ek", "sil">; //////////////////////////////////////////////////////////////////////////////// // E.3.23-24 Table lookup, Extended table lookup @@ -314,7 +317,7 @@ def VORR : Inst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_OR>; def VEOR : Inst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_XOR>; def VBIC : Inst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ANDN>; def VORN : Inst<"ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ORN>; -def VBSL : BInst<"dxdd", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPs">; +def VBSL : BInst<"dudd", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPs">; //////////////////////////////////////////////////////////////////////////////// // E.3.30 Transposition operations |