1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
|
MSTRINGIFY(
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n
#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n
__kernel void
ComputeBoundsKernel(
const int numNodes,
const int numSoftBodies,
__global int * g_vertexClothIdentifier,
__global float4 * g_vertexPositions,
/* Unfortunately, to get the atomics below to work these arrays cannot be */
/* uint4, though that is the layout of the data */
/* Therefore this is little-endian-only code */
volatile __global uint * g_clothMinBounds,
volatile __global uint * g_clothMaxBounds,
volatile __local uint * clothMinBounds,
volatile __local uint * clothMaxBounds)
{
// Init min and max bounds arrays
if( get_local_id(0) < numSoftBodies )
{
clothMinBounds[get_local_id(0)*4] = UINT_MAX;
clothMinBounds[get_local_id(0)*4+1] = UINT_MAX;
clothMinBounds[get_local_id(0)*4+2] = UINT_MAX;
clothMinBounds[get_local_id(0)*4+3] = UINT_MAX;
clothMaxBounds[get_local_id(0)*4] = 0;
clothMaxBounds[get_local_id(0)*4+1] = 0;
clothMaxBounds[get_local_id(0)*4+2] = 0;
clothMaxBounds[get_local_id(0)*4+3] = 0;
}
barrier(CLK_LOCAL_MEM_FENCE);
int nodeID = get_global_id(0);
if( nodeID < numNodes )
{
int clothIdentifier = g_vertexClothIdentifier[nodeID];
if( clothIdentifier >= 0 )
{
float4 position = (float4)(g_vertexPositions[nodeID].xyz, 0.f);
/* Reinterpret position as uint */
uint4 positionUInt = (uint4)(as_uint(position.x), as_uint(position.y), as_uint(position.z), 0);
/* Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints */
positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);
positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
// Min/max with the LDS values
atom_min(&(clothMinBounds[clothIdentifier*4]), positionUInt.x);
atom_min(&(clothMinBounds[clothIdentifier*4+1]), positionUInt.y);
atom_min(&(clothMinBounds[clothIdentifier*4+2]), positionUInt.z);
atom_max(&(clothMaxBounds[clothIdentifier*4]), positionUInt.x);
atom_max(&(clothMaxBounds[clothIdentifier*4+1]), positionUInt.y);
atom_max(&(clothMaxBounds[clothIdentifier*4+2]), positionUInt.z);
}
}
barrier(CLK_LOCAL_MEM_FENCE);
/* Use global atomics to update the global versions of the data */
if( get_local_id(0) < numSoftBodies )
{
/*atom_min(&(g_clothMinBounds[get_local_id(0)].x), clothMinBounds[get_local_id(0)].x);*/
atom_min(&(g_clothMinBounds[get_local_id(0)*4]), clothMinBounds[get_local_id(0)*4]);
atom_min(&(g_clothMinBounds[get_local_id(0)*4+1]), clothMinBounds[get_local_id(0)*4+1]);
atom_min(&(g_clothMinBounds[get_local_id(0)*4+2]), clothMinBounds[get_local_id(0)*4+2]);
atom_max(&(g_clothMaxBounds[get_local_id(0)*4]), clothMaxBounds[get_local_id(0)*4]);
atom_max(&(g_clothMaxBounds[get_local_id(0)*4+1]), clothMaxBounds[get_local_id(0)*4+1]);
atom_max(&(g_clothMaxBounds[get_local_id(0)*4+2]), clothMaxBounds[get_local_id(0)*4+2]);
}
}
);
|