diff options
author | Craig Topper <craig.topper@gmail.com> | 2012-01-25 04:26:17 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2012-01-25 04:26:17 +0000 |
commit | 2ee2ac2293f313dfe1c6eb7034527a92b5d23158 (patch) | |
tree | 773f7e8cdca0be1cd71b5a1c4fc9130879509686 | |
parent | 00b465747138ec5c00e1d7568d2eb88c6db6042d (diff) |
Represent 256-bit unaligned loads natively and remove the builtins. Similar change was made for 128-bit versions a while back.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@148919 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/clang/Basic/BuiltinsX86.def | 3 | ||||
-rw-r--r-- | lib/Headers/avxintrin.h | 15 | ||||
-rw-r--r-- | test/CodeGen/avx-builtins.c | 25 |
3 files changed, 37 insertions, 6 deletions
diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index 17a7cc63e1..2356b54d3c 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -453,11 +453,8 @@ BUILTIN(__builtin_ia32_vbroadcastsd256, "V4ddC*", "") BUILTIN(__builtin_ia32_vbroadcastss256, "V8ffC*", "") BUILTIN(__builtin_ia32_vbroadcastf128_pd256, "V4dV2dC*", "") BUILTIN(__builtin_ia32_vbroadcastf128_ps256, "V8fV4fC*", "") -BUILTIN(__builtin_ia32_loadupd256, "V4ddC*", "") -BUILTIN(__builtin_ia32_loadups256, "V8ffC*", "") BUILTIN(__builtin_ia32_storeupd256, "vd*V4d", "") BUILTIN(__builtin_ia32_storeups256, "vf*V8f", "") -BUILTIN(__builtin_ia32_loaddqu256, "V32ccC*", "") BUILTIN(__builtin_ia32_storedqu256, "vc*V32c", "") BUILTIN(__builtin_ia32_lddqu256, "V32ccC*", "") BUILTIN(__builtin_ia32_movntdq256, "vV4LLi*V4LLi", "") diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h index 620ee0fbb0..ce4b2264bf 100644 --- a/lib/Headers/avxintrin.h +++ b/lib/Headers/avxintrin.h @@ -751,13 +751,19 @@ _mm256_load_ps(float const *p) static __inline __m256d __attribute__((__always_inline__, __nodebug__)) _mm256_loadu_pd(double const *p) { - return (__m256d)__builtin_ia32_loadupd256(p); + struct __loadu_pd { + __m256d v; + } __attribute__((packed, may_alias)); + return ((struct __loadu_pd*)p)->v; } static __inline __m256 __attribute__((__always_inline__, __nodebug__)) _mm256_loadu_ps(float const *p) { - return (__m256)__builtin_ia32_loadups256(p); + struct __loadu_ps { + __m256 v; + } __attribute__((packed, may_alias)); + return ((struct __loadu_ps*)p)->v; } static __inline __m256i __attribute__((__always_inline__, __nodebug__)) @@ -769,7 +775,10 @@ _mm256_load_si256(__m256i const *p) static __inline __m256i __attribute__((__always_inline__, __nodebug__)) _mm256_loadu_si256(__m256i const *p) { - return (__m256i)__builtin_ia32_loaddqu256((char const *)p); + struct __loadu_si256 { + __m256i v; + } __attribute__((packed, may_alias)); + return ((struct __loadu_si256*)p)->v; } static __inline __m256i __attribute__((__always_inline__, __nodebug__)) diff --git a/test/CodeGen/avx-builtins.c b/test/CodeGen/avx-builtins.c new file mode 100644 index 0000000000..f03141225c --- /dev/null +++ b/test/CodeGen/avx-builtins.c @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s + +// Don't include mm_malloc.h, it's system specific. +#define __MM_MALLOC_H + +#include <immintrin.h> + +// +// Test LLVM IR codegen of shuffle instructions +// + +__m256 test__mm256_loadu_ps(void* p) { + // CHECK: load <8 x float>* %{.*}, align 1 + return _mm256_loadu_ps(p); +} + +__m256d test__mm256_loadu_pd(void* p) { + // CHECK: load <4 x double>* %{.*}, align 1 + return _mm256_loadu_pd(p); +} + +__m256i test__mm256_loadu_si256(void* p) { + // CHECK: load <4 x i64>* %0, align 1 + return _mm256_loadu_si256(p); +} |