blob: b597d8a3477494618672419e58cc5b172af22806 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
#include <stdio.h>
#include <xmmintrin.h>
static __inline__ __m128 __attribute__((__always_inline__))
_mm_load_ps(const float *__p)
{
return *(__m128*)__p;
}
float simdAverage(float *src, int len) {
__m128 sumx4 = _mm_setzero_ps();
for (int i = 0; i < len; i += 4) {
__m128 v = _mm_load_ps(src);
sumx4 = _mm_add_ps(sumx4, v);
src += 4;
}
float sumx4_mem[4];
float *sumx4_ptr = sumx4_mem;
_mm_store_ps(sumx4_ptr, sumx4);
return (sumx4_mem[0] + sumx4_mem[1] +
sumx4_mem[2] + sumx4_mem[3])/len;
}
void initArray(float *src, int len) {
for (int i = 0; i < len; ++i) {
src[i] = 0.1 * i;
}
}
int main() {
const int len = 100000;
float src[len];
float result = 0.0;
initArray(src, len);
result = simdAverage(src, len);
printf("averagex4 result: %.1f\n", result);
}
|