diff options
author | Manman Ren <mren@apple.com> | 2012-10-11 00:59:55 +0000 |
---|---|---|
committer | Manman Ren <mren@apple.com> | 2012-10-11 00:59:55 +0000 |
commit | 146e5a4a787a2ebfe89a6b74e7c22d850bf1c858 (patch) | |
tree | 8774ff332c6f10959f86969bc5482cc847b4bc56 | |
parent | 4145228b758892afd3545835a4caaea722f20510 (diff) |
X86: add F16C support in Clang
Support the following intrinsics:
_mm_cvtph_ps, _mm256_cvtph_ps, _mm_cvtps_ph, _mm256_cvtps_ph
rdar://12407875
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@165685 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/clang/Basic/BuiltinsX86.def | 6 | ||||
-rw-r--r-- | include/clang/Driver/Options.td | 2 | ||||
-rw-r--r-- | lib/Basic/Targets.cpp | 18 | ||||
-rw-r--r-- | lib/Headers/f16cintrin.h | 58 | ||||
-rw-r--r-- | lib/Headers/x86intrin.h | 4 | ||||
-rw-r--r-- | test/CodeGen/f16c-builtins.c | 26 |
6 files changed, 113 insertions, 1 deletions
diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index 75e6074214..dc850c438c 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -613,6 +613,12 @@ BUILTIN(__builtin_ia32_gatherd_d256, "V8iV8iV8iC*V8iV8iIc", "") BUILTIN(__builtin_ia32_gatherq_d, "V4iV4iV4iC*V2LLiV4iIc", "") BUILTIN(__builtin_ia32_gatherq_d256, "V4iV4iV4iC*V4LLiV4iIc", "") +// F16C +BUILTIN(__builtin_ia32_vcvtps2ph, "V8sV4fIi", "") +BUILTIN(__builtin_ia32_vcvtps2ph256, "V8sV8fIi", "") +BUILTIN(__builtin_ia32_vcvtph2ps, "V4fV8s", "") +BUILTIN(__builtin_ia32_vcvtph2ps256, "V8fV8s", "") + // RDRAND BUILTIN(__builtin_ia32_rdrand16_step, "UiUs*", "") BUILTIN(__builtin_ia32_rdrand32_step, "UiUi*", "") diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td index af5824aa23..b1bb0695ee 100644 --- a/include/clang/Driver/Options.td +++ b/include/clang/Driver/Options.td @@ -823,6 +823,7 @@ def mno_popcnt : Flag<"-mno-popcnt">, Group<m_x86_Features_Group>; def mno_fma4 : Flag<"-mno-fma4">, Group<m_x86_Features_Group>; def mno_fma : Flag<"-mno-fma">, Group<m_x86_Features_Group>; def mno_xop : Flag<"-mno-xop">, Group<m_x86_Features_Group>; +def mno_f16c : Flag<"-mno-f16c">, Group<m_x86_Features_Group>; def mno_thumb : Flag<"-mno-thumb">, Group<m_Group>; def marm : Flag<"-marm">, Alias<mno_thumb>; @@ -863,6 +864,7 @@ def mpopcnt : Flag<"-mpopcnt">, Group<m_x86_Features_Group>; def mfma4 : Flag<"-mfma4">, Group<m_x86_Features_Group>; def mfma : Flag<"-mfma">, Group<m_x86_Features_Group>; def mxop : Flag<"-mxop">, Group<m_x86_Features_Group>; +def mf16c : Flag<"-mf16c">, Group<m_x86_Features_Group>; def mips16 : Flag<"-mips16">, Group<m_Group>; def mno_mips16 : Flag<"-mno-mips16">, Group<m_Group>; def mdsp : Flag<"-mdsp">, Group<m_Group>; diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp index 17f1a615f1..bc32d75d79 100644 --- a/lib/Basic/Targets.cpp +++ b/lib/Basic/Targets.cpp @@ -1360,6 +1360,7 @@ class X86TargetInfo : public TargetInfo { bool HasFMA4; bool HasFMA; bool HasXOP; + bool HasF16C; /// \brief Enumeration of all of the X86 CPUs supported by Clang. /// @@ -1506,7 +1507,8 @@ public: : TargetInfo(triple), SSELevel(NoSSE), MMX3DNowLevel(NoMMX3DNow), HasAES(false), HasPCLMUL(false), HasLZCNT(false), HasRDRND(false), HasBMI(false), HasBMI2(false), HasPOPCNT(false), HasSSE4a(false), - HasFMA4(false), HasFMA(false), HasXOP(false), CPU(CK_Generic) { + HasFMA4(false), HasFMA(false), HasXOP(false), HasF16C(false), + CPU(CK_Generic) { BigEndian = false; LongDoubleFormat = &llvm::APFloat::x87DoubleExtended; } @@ -1712,6 +1714,7 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const { Features["fma4"] = false; Features["fma"] = false; Features["xop"] = false; + Features["f16c"] = false; // FIXME: This *really* should not be here. @@ -1922,6 +1925,8 @@ bool X86TargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features, Features["bmi2"] = true; else if (Name == "popcnt") Features["popcnt"] = true; + else if (Name == "f16c") + Features["f16c"] = true; } else { if (Name == "mmx") Features["mmx"] = Features["3dnow"] = Features["3dnowa"] = false; @@ -1982,6 +1987,8 @@ bool X86TargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features, Features["fma4"] = Features["xop"] = false; else if (Name == "xop") Features["xop"] = false; + else if (Name == "f16c") + Features["f16c"] = false; } return true; @@ -2053,6 +2060,11 @@ void X86TargetInfo::HandleTargetFeatures(std::vector<std::string> &Features) { continue; } + if (Feature == "f16c") { + HasF16C = true; + continue; + } + assert(Features[i][0] == '+' && "Invalid target feature!"); X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature) .Case("avx2", AVX2) @@ -2261,6 +2273,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasXOP) Builder.defineMacro("__XOP__"); + if (HasF16C) + Builder.defineMacro("__F16C__"); + // Each case falls through to the previous one here. switch (SSELevel) { case AVX2: @@ -2344,6 +2359,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("x86_32", PointerWidth == 32) .Case("x86_64", PointerWidth == 64) .Case("xop", HasXOP) + .Case("f16c", HasF16C) .Default(false); } diff --git a/lib/Headers/f16cintrin.h b/lib/Headers/f16cintrin.h new file mode 100644 index 0000000000..2c96952446 --- /dev/null +++ b/lib/Headers/f16cintrin.h @@ -0,0 +1,58 @@ +/*===---- f16cintrin.h - F16C intrinsics ---------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H +#error "Never use <f16cintrin.h> directly; include <x86intrin.h> instead." +#endif + +#ifndef __F16C__ +# error "F16C instruction is not enabled" +#endif /* __F16C__ */ + +#ifndef __F16CINTRIN_H +#define __F16CINTRIN_H + +typedef float __v8sf __attribute__ ((__vector_size__ (32))); +typedef float __m256 __attribute__ ((__vector_size__ (32))); + +#define _mm_cvtps_ph(a, imm) __extension__ ({ \ + __m128 __a = (a); \ + (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)__a, (imm)); }) + +#define _mm256_cvtps_ph(a, imm) __extension__ ({ \ + __m256 __a = (a); \ + (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__a, (imm)); }) + +static __inline __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtph_ps(__m128i a) +{ + return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)a); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtph_ps(__m128i a) +{ + return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)a); +} + +#endif /* __F16CINTRIN_H */ diff --git a/lib/Headers/x86intrin.h b/lib/Headers/x86intrin.h index 556cd011f0..68ce106be3 100644 --- a/lib/Headers/x86intrin.h +++ b/lib/Headers/x86intrin.h @@ -58,6 +58,10 @@ #include <xopintrin.h> #endif +#ifdef __F16C__ +#include <f16cintrin.h> +#endif + // FIXME: LWP #endif /* __X86INTRIN_H */ diff --git a/test/CodeGen/f16c-builtins.c b/test/CodeGen/f16c-builtins.c new file mode 100644 index 0000000000..28430d52f6 --- /dev/null +++ b/test/CodeGen/f16c-builtins.c @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +f16c -emit-llvm -o - | FileCheck %s + +// Don't include mm_malloc.h, it's system specific. +#define __MM_MALLOC_H + +#include <x86intrin.h> + +__m128 test_mm_cvtph_ps(__m128i a) { + // CHECK: @llvm.x86.vcvtph2ps.128 + return _mm_cvtph_ps(a); +} + +__m256 test_mm256_cvtph_ps(__m128i a) { + // CHECK: @llvm.x86.vcvtph2ps.256 + return _mm256_cvtph_ps(a); +} + +__m128i test_mm_cvtps_ph(__m128 a) { + // CHECK: @llvm.x86.vcvtps2ph.128 + return _mm_cvtps_ph(a, 0); +} + +__m128i test_mm256_cvtps_ph(__m256 a) { + // CHECK: @llvm.x86.vcvtps2ph.256 + return _mm256_cvtps_ph(a, 0); +} |