aboutsummaryrefslogtreecommitdiff
path: root/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC
diff options
context:
space:
mode:
Diffstat (limited to 'tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC')
-rw-r--r--tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/ApplyForces.cl91
-rw-r--r--tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/ComputeBounds.cl80
-rw-r--r--tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/Integrate.cl35
-rw-r--r--tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/OutputToVertexArray.cl57
-rw-r--r--tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/PrepareLinks.cl34
-rw-r--r--tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolveCollisionsAndUpdateVelocities.cl195
-rw-r--r--tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl213
-rw-r--r--tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositions.cl55
-rw-r--r--tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositionsSIMDBatched.cl129
-rw-r--r--tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateConstants.cl44
-rw-r--r--tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateNodes.cl40
-rw-r--r--tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateNormals.cl103
-rw-r--r--tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdatePositions.cl36
-rw-r--r--tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdatePositionsFromVelocities.cl26
-rw-r--r--tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/VSolveLinks.cl45
15 files changed, 1183 insertions, 0 deletions
diff --git a/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/ApplyForces.cl b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/ApplyForces.cl
new file mode 100644
index 00000000..7204a80c
--- /dev/null
+++ b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/ApplyForces.cl
@@ -0,0 +1,91 @@
+MSTRINGIFY(
+
+/*#define float3 float4
+float dot3(float3 a, float3 b)
+{
+ return a.x*b.x + a.y*b.y + a.z*b.z;
+}*/
+
+float3 projectOnAxis( float3 v, float3 a )
+{
+ return (a*dot(v, a));
+}
+
+__kernel void
+ApplyForcesKernel(
+ const uint numNodes,
+ const float solverdt,
+ const float epsilon,
+ __global int * g_vertexClothIdentifier,
+ __global float4 * g_vertexNormal,
+ __global float * g_vertexArea,
+ __global float * g_vertexInverseMass,
+ __global float * g_clothLiftFactor,
+ __global float * g_clothDragFactor,
+ __global float4 * g_clothWindVelocity,
+ __global float4 * g_clothAcceleration,
+ __global float * g_clothMediumDensity,
+ __global float4 * g_vertexForceAccumulator,
+ __global float4 * g_vertexVelocity)
+{
+ unsigned int nodeID = get_global_id(0);
+ if( nodeID < numNodes )
+ {
+ int clothId = g_vertexClothIdentifier[nodeID];
+ float nodeIM = g_vertexInverseMass[nodeID];
+
+ if( nodeIM > 0.0f )
+ {
+ float3 nodeV = g_vertexVelocity[nodeID].xyz;
+ float3 normal = g_vertexNormal[nodeID].xyz;
+ float area = g_vertexArea[nodeID];
+ float3 nodeF = g_vertexForceAccumulator[nodeID].xyz;
+
+ // Read per-cloth values
+ float3 clothAcceleration = g_clothAcceleration[clothId].xyz;
+ float3 clothWindVelocity = g_clothWindVelocity[clothId].xyz;
+ float liftFactor = g_clothLiftFactor[clothId];
+ float dragFactor = g_clothDragFactor[clothId];
+ float mediumDensity = g_clothMediumDensity[clothId];
+
+ // Apply the acceleration to the cloth rather than do this via a force
+ nodeV += (clothAcceleration*solverdt);
+
+ g_vertexVelocity[nodeID] = (float4)(nodeV, 0.f);
+
+ float3 relativeWindVelocity = nodeV - clothWindVelocity;
+ float relativeSpeedSquared = dot(relativeWindVelocity, relativeWindVelocity);
+
+ if( relativeSpeedSquared > epsilon )
+ {
+ // Correct direction of normal relative to wind direction and get dot product
+ normal = normal * (dot(normal, relativeWindVelocity) < 0 ? -1.f : 1.f);
+ float dvNormal = dot(normal, relativeWindVelocity);
+ if( dvNormal > 0 )
+ {
+ float3 force = (float3)(0.f, 0.f, 0.f);
+ float c0 = area * dvNormal * relativeSpeedSquared / 2.f;
+ float c1 = c0 * mediumDensity;
+ force += normal * (-c1 * liftFactor);
+ force += normalize(relativeWindVelocity)*(-c1 * dragFactor);
+
+ float dtim = solverdt * nodeIM;
+ float3 forceDTIM = force * dtim;
+
+ float3 nodeFPlusForce = nodeF + force;
+
+ // m_nodesf[i] -= ProjectOnAxis(m_nodesv[i], force.normalized())/dtim;
+ float3 nodeFMinus = nodeF - (projectOnAxis(nodeV, normalize(force))/dtim);
+
+ nodeF = nodeFPlusForce;
+ if( dot(forceDTIM, forceDTIM) > dot(nodeV, nodeV) )
+ nodeF = nodeFMinus;
+
+ g_vertexForceAccumulator[nodeID] = (float4)(nodeF, 0.0f);
+ }
+ }
+ }
+ }
+}
+
+); \ No newline at end of file
diff --git a/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/ComputeBounds.cl b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/ComputeBounds.cl
new file mode 100644
index 00000000..15c0cdc6
--- /dev/null
+++ b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/ComputeBounds.cl
@@ -0,0 +1,80 @@
+MSTRINGIFY(
+#pragma OPENCL EXTENSION cl_amd_printf : enable \n
+
+
+
+__kernel void
+ComputeBoundsKernel(
+ int numNodes,
+ int numSoftBodies,
+ __global int * g_vertexClothIdentifier,
+ __global float4 * g_vertexPositions,
+ volatile __global uint * g_clothMinBounds,
+ volatile __global uint * g_clothMaxBounds,
+ volatile __local uint * clothMinBounds,
+ volatile __local uint * clothMaxBounds)
+{
+ // Init min and max bounds arrays
+ if( get_local_id(0) < numSoftBodies )
+ {
+
+ clothMinBounds[get_local_id(0)*4] = UINT_MAX;
+ clothMinBounds[get_local_id(0)*4+1] = UINT_MAX;
+ clothMinBounds[get_local_id(0)*4+2] = UINT_MAX;
+ clothMinBounds[get_local_id(0)*4+3] = UINT_MAX;
+ clothMaxBounds[get_local_id(0)*4] = 0;
+ clothMaxBounds[get_local_id(0)*4+1] = 0;
+ clothMaxBounds[get_local_id(0)*4+2] = 0;
+ clothMaxBounds[get_local_id(0)*4+3] = 0;
+
+ }
+
+
+ barrier(CLK_GLOBAL_MEM_FENCE);
+
+ int nodeID = get_global_id(0);
+ if( nodeID < numNodes )
+ {
+ int clothIdentifier = g_vertexClothIdentifier[get_global_id(0)];
+ if( clothIdentifier >= 0 )
+ {
+ float3 position = g_vertexPositions[get_global_id(0)].xyz;
+
+ /* Reinterpret position as uint */
+ uint3 positionUInt = (uint3)(as_uint(position.x), as_uint(position.y), as_uint(position.z));
+
+ /* Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints */
+ positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
+ positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);
+ positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
+
+ /* Min/max with the LDS values */
+ atomic_min(&(clothMinBounds[clothIdentifier*4]), positionUInt.x);
+ atomic_min(&(clothMinBounds[clothIdentifier*4+1]), positionUInt.y);
+ atomic_min(&(clothMinBounds[clothIdentifier*4+2]), positionUInt.z);
+
+ atomic_max(&(clothMaxBounds[clothIdentifier*4]), positionUInt.x);
+ atomic_max(&(clothMaxBounds[clothIdentifier*4+1]), positionUInt.y);
+ atomic_max(&(clothMaxBounds[clothIdentifier*4+2]), positionUInt.z);
+ }
+ }
+
+ barrier(CLK_GLOBAL_MEM_FENCE);
+
+
+ /* Use global atomics to update the global versions of the data*/
+ if( get_local_id(0) < numSoftBodies )
+ {
+ /*atomic_min(&(g_clothMinBounds[get_local_id(0)].x), clothMinBounds[get_local_id(0)].x);*/
+ atomic_min(&(g_clothMinBounds[get_local_id(0)*4]), clothMinBounds[get_local_id(0)*4]);
+ atomic_min(&(g_clothMinBounds[get_local_id(0)*4+1]), clothMinBounds[get_local_id(0)*4+1]);
+ atomic_min(&(g_clothMinBounds[get_local_id(0)*4+2]), clothMinBounds[get_local_id(0)*4+2]);
+
+ atomic_max(&(g_clothMaxBounds[get_local_id(0)*4]), clothMaxBounds[get_local_id(0)*4]);
+ atomic_max(&(g_clothMaxBounds[get_local_id(0)*4+1]), clothMaxBounds[get_local_id(0)*4+1]);
+ atomic_max(&(g_clothMaxBounds[get_local_id(0)*4+2]), clothMaxBounds[get_local_id(0)*4+2]);
+ }
+}
+
+
+);
diff --git a/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/Integrate.cl b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/Integrate.cl
new file mode 100644
index 00000000..4a2c9f2f
--- /dev/null
+++ b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/Integrate.cl
@@ -0,0 +1,35 @@
+MSTRINGIFY(
+
+// Node indices for each link
+
+//#define float3 float4
+
+__kernel void
+IntegrateKernel(
+ const int numNodes,
+ const float solverdt,
+ __global float * g_vertexInverseMasses,
+ __global float4 * g_vertexPositions,
+ __global float4 * g_vertexVelocity,
+ __global float4 * g_vertexPreviousPositions,
+ __global float4 * g_vertexForceAccumulator)
+{
+ int nodeID = get_global_id(0);
+ if( nodeID < numNodes )
+ {
+ float3 position = g_vertexPositions[nodeID].xyz;
+ float3 velocity = g_vertexVelocity[nodeID].xyz;
+ float3 force = g_vertexForceAccumulator[nodeID].xyz;
+ float inverseMass = g_vertexInverseMasses[nodeID];
+
+ g_vertexPreviousPositions[nodeID] = (float4)(position, 0.f);
+ velocity += force * inverseMass * solverdt;
+ position += velocity * solverdt;
+
+ g_vertexForceAccumulator[nodeID] = (float4)(0.f, 0.f, 0.f, 0.0f);
+ g_vertexPositions[nodeID] = (float4)(position, 0.f);
+ g_vertexVelocity[nodeID] = (float4)(velocity, 0.f);
+ }
+}
+
+); \ No newline at end of file
diff --git a/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/OutputToVertexArray.cl b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/OutputToVertexArray.cl
new file mode 100644
index 00000000..4bc614c0
--- /dev/null
+++ b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/OutputToVertexArray.cl
@@ -0,0 +1,57 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+cbuffer OutputToVertexArrayCB : register( b0 )
+{
+ int startNode;
+ int numNodes;
+ int offsetX;
+ int strideX;
+
+ int offsetN;
+ int strideN;
+ int padding1;
+ int padding2;
+};
+
+
+StructuredBuffer<float4> g_nodesx : register( t0 );
+StructuredBuffer<float4> g_nodesn : register( t1 );
+
+RWStructuredBuffer<float> g_vertexBuffer : register( u0 );
+
+
+[numthreads(128, 1, 1)]
+void
+OutputToVertexArrayKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+ int nodeID = DTid.x;
+ if( nodeID < numNodes )
+ {
+ float4 nodeX = g_nodesx[nodeID + startNode];
+ float4 nodeN = g_nodesn[nodeID + startNode];
+
+ // Stride should account for the float->float4 conversion
+ int positionDestination = nodeID * strideX + offsetX;
+ g_vertexBuffer[positionDestination] = nodeX.x;
+ g_vertexBuffer[positionDestination+1] = nodeX.y;
+ g_vertexBuffer[positionDestination+2] = nodeX.z;
+
+ int normalDestination = nodeID * strideN + offsetN;
+ g_vertexBuffer[normalDestination] = nodeN.x;
+ g_vertexBuffer[normalDestination+1] = nodeN.y;
+ g_vertexBuffer[normalDestination+2] = nodeN.z;
+ }
+} \ No newline at end of file
diff --git a/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/PrepareLinks.cl b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/PrepareLinks.cl
new file mode 100644
index 00000000..f37a2f35
--- /dev/null
+++ b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/PrepareLinks.cl
@@ -0,0 +1,34 @@
+MSTRINGIFY(
+
+__kernel void
+PrepareLinksKernel(
+ const int numLinks,
+ __global int2 * g_linksVertexIndices,
+ __global float * g_linksMassLSC,
+ __global float4 * g_nodesPreviousPosition,
+ __global float * g_linksLengthRatio,
+ __global float4 * g_linksCurrentLength)
+{
+ int linkID = get_global_id(0);
+ if( linkID < numLinks )
+ {
+ int2 nodeIndices = g_linksVertexIndices[linkID];
+ int node0 = nodeIndices.x;
+ int node1 = nodeIndices.y;
+
+ float4 nodePreviousPosition0 = g_nodesPreviousPosition[node0];
+ float4 nodePreviousPosition1 = g_nodesPreviousPosition[node1];
+
+ float massLSC = g_linksMassLSC[linkID];
+
+ float4 linkCurrentLength = nodePreviousPosition1 - nodePreviousPosition0;
+
+ float linkLengthRatio = dot(linkCurrentLength, linkCurrentLength)*massLSC;
+ linkLengthRatio = 1.0f/linkLengthRatio;
+
+ g_linksCurrentLength[linkID] = linkCurrentLength;
+ g_linksLengthRatio[linkID] = linkLengthRatio;
+ }
+}
+
+); \ No newline at end of file
diff --git a/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolveCollisionsAndUpdateVelocities.cl b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolveCollisionsAndUpdateVelocities.cl
new file mode 100644
index 00000000..9f50da8a
--- /dev/null
+++ b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolveCollisionsAndUpdateVelocities.cl
@@ -0,0 +1,195 @@
+MSTRINGIFY(
+
+typedef struct
+{
+ int firstObject;
+ int endObject;
+} CollisionObjectIndices;
+
+typedef struct
+{
+ float4 shapeTransform[4]; // column major 4x4 matrix
+ float4 linearVelocity;
+ float4 angularVelocity;
+
+ int softBodyIdentifier;
+ int collisionShapeType;
+
+
+ // Shape information
+ // Compressed from the union
+ float radius;
+ float halfHeight;
+ int upAxis;
+
+ float margin;
+ float friction;
+
+ int padding0;
+
+} CollisionShapeDescription;
+
+/* From btBroadphaseProxy.h */
+__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
+
+/* Multiply column-major matrix against vector */
+float4 matrixVectorMul( float4 matrix[4], float4 vector )
+{
+ float4 returnVector;
+ float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
+ float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
+ float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
+ float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
+ returnVector.x = dot(row0, vector);
+ returnVector.y = dot(row1, vector);
+ returnVector.z = dot(row2, vector);
+ returnVector.w = dot(row3, vector);
+ return returnVector;
+}
+
+__kernel void
+SolveCollisionsAndUpdateVelocitiesKernel(
+ const int numNodes,
+ const float isolverdt,
+ __global int *g_vertexClothIdentifier,
+ __global float4 *g_vertexPreviousPositions,
+ __global float * g_perClothFriction,
+ __global float * g_clothDampingFactor,
+ __global CollisionObjectIndices * g_perClothCollisionObjectIndices,
+ __global CollisionShapeDescription * g_collisionObjectDetails,
+ __global float4 * g_vertexForces,
+ __global float4 *g_vertexVelocities,
+ __global float4 *g_vertexPositions)
+{
+ int nodeID = get_global_id(0);
+ float3 forceOnVertex = (float3)(0.f, 0.f, 0.f);
+ if( get_global_id(0) < numNodes )
+ {
+ int clothIdentifier = g_vertexClothIdentifier[nodeID];
+
+ // Abort if this is not a valid cloth
+ if( clothIdentifier < 0 )
+ return;
+
+ float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f);
+ float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f);
+ float3 velocity;
+ float clothFriction = g_perClothFriction[clothIdentifier];
+ float dampingFactor = g_clothDampingFactor[clothIdentifier];
+ float velocityCoefficient = (1.f - dampingFactor);
+ CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
+
+ if( collisionObjectIndices.firstObject != collisionObjectIndices.endObject )
+ {
+ velocity = (float3)(15, 0, 0);
+
+ /* We have some possible collisions to deal with */
+ for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
+ {
+ CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
+ float colliderFriction = shapeDescription.friction;
+
+ if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
+ {
+ /* Colliding with a capsule */
+
+ float capsuleHalfHeight = shapeDescription.halfHeight;
+ float capsuleRadius = shapeDescription.radius;
+ float capsuleMargin = shapeDescription.margin;
+ int capsuleupAxis = shapeDescription.upAxis;
+
+ /* Four columns of worldTransform matrix */
+ float4 worldTransform[4];
+ worldTransform[0] = shapeDescription.shapeTransform[0];
+ worldTransform[1] = shapeDescription.shapeTransform[1];
+ worldTransform[2] = shapeDescription.shapeTransform[2];
+ worldTransform[3] = shapeDescription.shapeTransform[3];
+
+ // Correctly define capsule centerline vector
+ float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f);
+ float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
+ c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
+ c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
+ c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
+ c2.x = -c1.x;
+ c2.y = -c1.y;
+ c2.z = -c1.z;
+
+ float4 worldC1 = matrixVectorMul(worldTransform, c1);
+ float4 worldC2 = matrixVectorMul(worldTransform, c2);
+ float3 segment = (worldC2 - worldC1).xyz;
+
+ /* compute distance of tangent to vertex along line segment in capsule */
+ float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
+
+ float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment, 0.f));
+ float distanceFromLine = length(position - closestPoint);
+ float distanceFromC1 = length(worldC1 - position);
+ float distanceFromC2 = length(worldC2 - position);
+
+ /* Final distance from collision, point to push from, direction to push in
+ for impulse force */
+ float dist;
+ float3 normalVector;
+ if( distanceAlongSegment < 0 )
+ {
+ dist = distanceFromC1;
+ normalVector = normalize(position - worldC1).xyz;
+ } else if( distanceAlongSegment > 1.f ) {
+ dist = distanceFromC2;
+ normalVector = normalize(position - worldC2).xyz;
+ } else {
+ dist = distanceFromLine;
+ normalVector = normalize(position - closestPoint).xyz;
+ }
+
+ float3 colliderLinearVelocity = shapeDescription.linearVelocity.xyz;
+ float3 colliderAngularVelocity = shapeDescription.angularVelocity.xyz;
+ float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - (float3)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w));
+
+ float minDistance = capsuleRadius + capsuleMargin;
+
+ /* In case of no collision, this is the value of velocity */
+ velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
+
+
+ // Check for a collision
+ if( dist < minDistance )
+ {
+ /* Project back to surface along normal */
+ position = position + (float4)((minDistance - dist)*normalVector*0.9f, 0.f);
+ velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
+ float3 relativeVelocity = velocity - velocityOfSurfacePoint;
+
+ float3 p1 = normalize(cross(normalVector, segment));
+ float3 p2 = normalize(cross(p1, normalVector));
+ /* Full friction is sum of velocities in each direction of plane */
+ float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
+
+ /* Real friction is peak friction corrected by friction coefficients */
+ frictionVector = frictionVector * (colliderFriction*clothFriction);
+
+ float approachSpeed = dot(relativeVelocity, normalVector);
+
+ if( approachSpeed <= 0.0f )
+ forceOnVertex -= frictionVector;
+ }
+
+ }
+ }
+ } else {
+ /* Update velocity */
+ float3 difference = position.xyz - previousPosition.xyz;
+ velocity = difference*velocityCoefficient*isolverdt;
+ }
+
+ g_vertexVelocities[nodeID] = (float4)(velocity, 0.f);
+
+ /* Update external force */
+ g_vertexForces[nodeID] = (float4)(forceOnVertex, 0.f);
+
+ g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
+ }
+}
+
+);
diff --git a/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl
new file mode 100644
index 00000000..5ab2a620
--- /dev/null
+++ b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl
@@ -0,0 +1,213 @@
+MSTRINGIFY(
+
+typedef struct
+{
+ int firstObject;
+ int endObject;
+} CollisionObjectIndices;
+
+typedef struct
+{
+ float4 shapeTransform[4]; /* column major 4x4 matrix */
+ float4 linearVelocity;
+ float4 angularVelocity;
+
+ int softBodyIdentifier;
+ int collisionShapeType;
+
+
+ // Shape information
+ // Compressed from the union
+ float radius;
+ float halfHeight;
+ int upAxis;
+
+ float margin;
+ float friction;
+
+ int padding0;
+
+} CollisionShapeDescription;
+
+/* From btBroadphaseProxy.h */
+__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
+
+
+/* Multiply column-major matrix against vector */
+float4 matrixVectorMul( float4 matrix[4], float4 vector )
+{
+ float4 returnVector;
+ float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
+ float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
+ float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
+ float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
+ returnVector.x = dot(row0, vector);
+ returnVector.y = dot(row1, vector);
+ returnVector.z = dot(row2, vector);
+ returnVector.w = dot(row3, vector);
+ return returnVector;
+}
+
+__kernel void
+SolveCollisionsAndUpdateVelocitiesKernel(
+ const int numNodes,
+ const float isolverdt,
+ __global int *g_vertexClothIdentifier,
+ __global float4 *g_vertexPreviousPositions,
+ __global float * g_perClothFriction,
+ __global float * g_clothDampingFactor,
+ __global CollisionObjectIndices * g_perClothCollisionObjectIndices,
+ __global CollisionShapeDescription * g_collisionObjectDetails,
+ __global float4 * g_vertexForces,
+ __global float4 *g_vertexVelocities,
+ __global float4 *g_vertexPositions,
+ __local CollisionShapeDescription *localCollisionShapes)
+{
+ int nodeID = get_global_id(0);
+ float3 forceOnVertex = (float3)(0.f, 0.f, 0.f);
+
+ int clothIdentifier = g_vertexClothIdentifier[nodeID];
+
+ // Abort if this is not a valid cloth
+ if( clothIdentifier < 0 )
+ return;
+
+ float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f);
+ float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f);
+ float3 velocity;
+ float clothFriction = g_perClothFriction[clothIdentifier];
+ float dampingFactor = g_clothDampingFactor[clothIdentifier];
+ float velocityCoefficient = (1.f - dampingFactor);
+ CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
+
+ int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
+ if( numObjects > 0 )
+ {
+ /* We have some possible collisions to deal with */
+
+ /* First load all of the collision objects into LDS */
+ int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
+ if( get_local_id(0) < numObjects )
+ {
+ localCollisionShapes[get_local_id(0)] = g_collisionObjectDetails[ collisionObjectIndices.firstObject + get_local_id(0) ];
+ }
+ }
+
+ /* Safe as the vertices are padded so that not more than one soft body is in a group */
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ /* Annoyingly, even though I know the flow control is not varying, the compiler will not let me skip this */
+ if( numObjects > 0 )
+ {
+ velocity = (float3)(0, 0, 0);
+
+
+ // We have some possible collisions to deal with
+ for( int collision = 0; collision < numObjects; ++collision )
+ {
+ CollisionShapeDescription shapeDescription = localCollisionShapes[collision];
+ float colliderFriction = shapeDescription.friction;
+
+ if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
+ {
+ /* Colliding with a capsule */
+
+ float capsuleHalfHeight = localCollisionShapes[collision].halfHeight;
+ float capsuleRadius = localCollisionShapes[collision].radius;
+ float capsuleMargin = localCollisionShapes[collision].margin;
+ int capsuleupAxis = localCollisionShapes[collision].upAxis;
+
+ float4 worldTransform[4];
+ worldTransform[0] = localCollisionShapes[collision].shapeTransform[0];
+ worldTransform[1] = localCollisionShapes[collision].shapeTransform[1];
+ worldTransform[2] = localCollisionShapes[collision].shapeTransform[2];
+ worldTransform[3] = localCollisionShapes[collision].shapeTransform[3];
+
+ // Correctly define capsule centerline vector
+ float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f);
+ float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
+ c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
+ c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
+ c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
+ c2.x = -c1.x;
+ c2.y = -c1.y;
+ c2.z = -c1.z;
+
+ float4 worldC1 = matrixVectorMul(worldTransform, c1);
+ float4 worldC2 = matrixVectorMul(worldTransform, c2);
+ float3 segment = (worldC2 - worldC1).xyz;
+
+
+ /* compute distance of tangent to vertex along line segment in capsule */
+ float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
+
+ float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment, 0.f));
+ float distanceFromLine = length(position - closestPoint);
+ float distanceFromC1 = length(worldC1 - position);
+ float distanceFromC2 = length(worldC2 - position);
+
+ /* Final distance from collision, point to push from, direction to push in
+ for impulse force */
+ float dist;
+ float3 normalVector;
+ if( distanceAlongSegment < 0 )
+ {
+ dist = distanceFromC1;
+ normalVector = normalize(position - worldC1).xyz;
+ } else if( distanceAlongSegment > 1.f ) {
+ dist = distanceFromC2;
+ normalVector = normalize(position - worldC2).xyz;
+ } else {
+ dist = distanceFromLine;
+ normalVector = normalize(position - closestPoint).xyz;
+ }
+
+ float3 colliderLinearVelocity = localCollisionShapes[collision].linearVelocity.xyz;
+ float3 colliderAngularVelocity = localCollisionShapes[collision].angularVelocity.xyz;
+ float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - (float3)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w));
+
+ float minDistance = capsuleRadius + capsuleMargin;
+
+ /* In case of no collision, this is the value of velocity */
+ velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
+
+
+ /* Check for a collision */
+ if( dist < minDistance )
+ {
+ /* Project back to surface along normal */
+ position = position + (float4)((minDistance - dist)*normalVector*0.9f, 0.f);
+ velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
+ float3 relativeVelocity = velocity - velocityOfSurfacePoint;
+
+ float3 p1 = normalize(cross(normalVector, segment));
+ float3 p2 = normalize(cross(p1, normalVector));
+ /* Full friction is sum of velocities in each direction of plane */
+ float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
+
+ /* Real friction is peak friction corrected by friction coefficients */
+ frictionVector = frictionVector * (colliderFriction*clothFriction);
+
+ float approachSpeed = dot(relativeVelocity, normalVector);
+
+ if( approachSpeed <= 0.0f )
+ forceOnVertex -= frictionVector;
+ }
+
+ }
+ }
+ } else {
+ /* Update velocity */
+ float3 difference = position.xyz - previousPosition.xyz;
+ velocity = difference*velocityCoefficient*isolverdt;
+ }
+
+ g_vertexVelocities[nodeID] = (float4)(velocity, 0.f);
+
+ /* Update external force */
+ g_vertexForces[nodeID] = (float4)(forceOnVertex, 0.f);
+
+ g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
+}
+
+);
diff --git a/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositions.cl b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositions.cl
new file mode 100644
index 00000000..4a08a56c
--- /dev/null
+++ b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositions.cl
@@ -0,0 +1,55 @@
+
+MSTRINGIFY(
+
+/*#define float3 float4
+
+float dot3(float3 a, float3 b)
+{
+ return a.x*b.x + a.y*b.y + a.z*b.z;
+}*/
+
+__kernel void
+SolvePositionsFromLinksKernel(
+ const int startLink,
+ const int numLinks,
+ const float kst,
+ const float ti,
+ __global int2 * g_linksVertexIndices,
+ __global float * g_linksMassLSC,
+ __global float * g_linksRestLengthSquared,
+ __global float * g_verticesInverseMass,
+ __global float4 * g_vertexPositions)
+
+{
+ int linkID = get_global_id(0) + startLink;
+ if( get_global_id(0) < numLinks )
+ {
+ float massLSC = g_linksMassLSC[linkID];
+ float restLengthSquared = g_linksRestLengthSquared[linkID];
+
+ if( massLSC > 0.0f )
+ {
+ int2 nodeIndices = g_linksVertexIndices[linkID];
+ int node0 = nodeIndices.x;
+ int node1 = nodeIndices.y;
+
+ float3 position0 = g_vertexPositions[node0].xyz;
+ float3 position1 = g_vertexPositions[node1].xyz;
+
+ float inverseMass0 = g_verticesInverseMass[node0];
+ float inverseMass1 = g_verticesInverseMass[node1];
+
+ float3 del = position1 - position0;
+ float len = dot(del, del);
+ float k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
+ position0 = position0 - del*(k*inverseMass0);
+ position1 = position1 + del*(k*inverseMass1);
+
+ g_vertexPositions[node0] = (float4)(position0, 0.f);
+ g_vertexPositions[node1] = (float4)(position1, 0.f);
+
+ }
+ }
+}
+
+); \ No newline at end of file
diff --git a/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositionsSIMDBatched.cl b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositionsSIMDBatched.cl
new file mode 100644
index 00000000..9a45570a
--- /dev/null
+++ b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/SolvePositionsSIMDBatched.cl
@@ -0,0 +1,129 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+MSTRINGIFY(
+
+float mydot3(float4 a, float4 b)
+{
+ return a.x*b.x + a.y*b.y + a.z*b.z;
+}
+
+__kernel void
+SolvePositionsFromLinksKernel(
+ const int startWaveInBatch,
+ const int numWaves,
+ const float kst,
+ const float ti,
+ __global int2 *g_wavefrontBatchCountsVertexCounts,
+ __global int *g_vertexAddressesPerWavefront,
+ __global int2 * g_linksVertexIndices,
+ __global float * g_linksMassLSC,
+ __global float * g_linksRestLengthSquared,
+ __global float * g_verticesInverseMass,
+ __global float4 * g_vertexPositions,
+ __local int2 *wavefrontBatchCountsVertexCounts,
+ __local float4 *vertexPositionSharedData,
+ __local float *vertexInverseMassSharedData)
+{
+ const int laneInWavefront = (get_global_id(0) & (WAVEFRONT_SIZE-1));
+ const int wavefront = startWaveInBatch + (get_global_id(0) / WAVEFRONT_SIZE);
+ const int firstWavefrontInBlock = startWaveInBatch + get_group_id(0) * WAVEFRONT_BLOCK_MULTIPLIER;
+ const int localWavefront = wavefront - firstWavefrontInBlock;
+
+ // Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier
+ if( wavefront < (startWaveInBatch + numWaves) )
+ {
+ // Load the batch counts for the wavefronts
+
+ int2 batchesAndVerticesWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront];
+ int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x;
+ int verticesUsedByWave = batchesAndVerticesWithinWavefront.y;
+
+ // Load the vertices for the wavefronts
+ for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
+ {
+ int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
+
+ vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress];
+ vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress];
+ }
+
+ mem_fence(CLK_LOCAL_MEM_FENCE);
+
+ // Loop through the batches performing the solve on each in LDS
+ int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE;
+
+ //for( int batch = 0; batch < batchesWithinWavefront; ++batch )
+
+ int batch = 0;
+ do
+ {
+ int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch;
+ int locationOfValue = baseDataLocation + laneInWavefront;
+
+
+ // These loads should all be perfectly linear across the WF
+ int2 localVertexIndices = g_linksVertexIndices[locationOfValue];
+ float massLSC = g_linksMassLSC[locationOfValue];
+ float restLengthSquared = g_linksRestLengthSquared[locationOfValue];
+
+ // LDS vertex addresses based on logical wavefront number in block and loaded index
+ int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x;
+ int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y;
+
+ float4 position0 = vertexPositionSharedData[vertexAddress0];
+ float4 position1 = vertexPositionSharedData[vertexAddress1];
+
+ float inverseMass0 = vertexInverseMassSharedData[vertexAddress0];
+ float inverseMass1 = vertexInverseMassSharedData[vertexAddress1];
+
+ float4 del = position1 - position0;
+ float len = mydot3(del, del);
+
+ float k = 0;
+ if( massLSC > 0.0f )
+ {
+ k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
+ }
+
+ position0 = position0 - del*(k*inverseMass0);
+ position1 = position1 + del*(k*inverseMass1);
+
+ // Ensure compiler does not re-order memory operations
+ mem_fence(CLK_LOCAL_MEM_FENCE);
+
+ vertexPositionSharedData[vertexAddress0] = position0;
+ vertexPositionSharedData[vertexAddress1] = position1;
+
+ // Ensure compiler does not re-order memory operations
+ mem_fence(CLK_LOCAL_MEM_FENCE);
+
+
+ ++batch;
+ } while( batch < batchesWithinWavefront );
+
+ // Update the global memory vertices for the wavefronts
+ for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
+ {
+ int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
+
+ g_vertexPositions[vertexAddress] = (float4)(vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex].xyz, 0.f);
+ }
+
+ }
+
+}
+
+);
diff --git a/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateConstants.cl b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateConstants.cl
new file mode 100644
index 00000000..488a5847
--- /dev/null
+++ b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateConstants.cl
@@ -0,0 +1,44 @@
+MSTRINGIFY(
+
+/*#define float3 float4
+
+float dot3(float3 a, float3 b)
+{
+ return a.x*b.x + a.y*b.y + a.z*b.z;
+}*/
+
+__kernel void
+UpdateConstantsKernel(
+ const int numLinks,
+ __global int2 * g_linksVertexIndices,
+ __global float4 * g_vertexPositions,
+ __global float * g_vertexInverseMasses,
+ __global float * g_linksMaterialLSC,
+ __global float * g_linksMassLSC,
+ __global float * g_linksRestLengthSquared,
+ __global float * g_linksRestLengths)
+{
+ int linkID = get_global_id(0);
+ if( linkID < numLinks )
+ {
+ int2 nodeIndices = g_linksVertexIndices[linkID];
+ int node0 = nodeIndices.x;
+ int node1 = nodeIndices.y;
+ float linearStiffnessCoefficient = g_linksMaterialLSC[ linkID ];
+
+ float3 position0 = g_vertexPositions[node0].xyz;
+ float3 position1 = g_vertexPositions[node1].xyz;
+ float inverseMass0 = g_vertexInverseMasses[node0];
+ float inverseMass1 = g_vertexInverseMasses[node1];
+
+ float3 difference = position0 - position1;
+ float length2 = dot(difference, difference);
+ float length = sqrt(length2);
+
+ g_linksRestLengths[linkID] = length;
+ g_linksMassLSC[linkID] = (inverseMass0 + inverseMass1)/linearStiffnessCoefficient;
+ g_linksRestLengthSquared[linkID] = length*length;
+ }
+}
+
+); \ No newline at end of file
diff --git a/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateNodes.cl b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateNodes.cl
new file mode 100644
index 00000000..cad4b8ad
--- /dev/null
+++ b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateNodes.cl
@@ -0,0 +1,40 @@
+MSTRINGIFY(
+
+//#define float3 float4
+
+__kernel void
+updateVelocitiesFromPositionsWithVelocitiesKernel(
+ int numNodes,
+ float isolverdt,
+ __global float4 * g_vertexPositions,
+ __global float4 * g_vertexPreviousPositions,
+ __global int * g_vertexClothIndices,
+ __global float *g_clothVelocityCorrectionCoefficients,
+ __global float * g_clothDampingFactor,
+ __global float4 * g_vertexVelocities,
+ __global float4 * g_vertexForces)
+{
+ int nodeID = get_global_id(0);
+ if( nodeID < numNodes )
+ {
+ float3 position = g_vertexPositions[nodeID].xyz;
+ float3 previousPosition = g_vertexPreviousPositions[nodeID].xyz;
+ float3 velocity = g_vertexVelocities[nodeID].xyz;
+ int clothIndex = g_vertexClothIndices[nodeID];
+ float velocityCorrectionCoefficient = g_clothVelocityCorrectionCoefficients[clothIndex];
+ float dampingFactor = g_clothDampingFactor[clothIndex];
+ float velocityCoefficient = (1.f - dampingFactor);
+
+ float3 difference = position - previousPosition;
+
+ velocity += difference*velocityCorrectionCoefficient*isolverdt;
+
+ // Damp the velocity
+ velocity *= velocityCoefficient;
+
+ g_vertexVelocities[nodeID] = (float4)(velocity, 0.f);
+ g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f);
+ }
+}
+
+); \ No newline at end of file
diff --git a/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateNormals.cl b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateNormals.cl
new file mode 100644
index 00000000..37c8b3fa
--- /dev/null
+++ b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdateNormals.cl
@@ -0,0 +1,103 @@
+MSTRINGIFY(
+
+//#define float3 float4
+
+/*float length3(float3 a)
+{
+ a.w = 0;
+ return length(a);
+}
+
+float normalize3(float3 a)
+{
+ a.w = 0;
+ return normalize(a);
+}*/
+
+__kernel void
+ResetNormalsAndAreasKernel(
+ const unsigned int numNodes,
+ __global float4 * g_vertexNormals,
+ __global float * g_vertexArea)
+{
+ if( get_global_id(0) < numNodes )
+ {
+ g_vertexNormals[get_global_id(0)] = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
+ g_vertexArea[get_global_id(0)] = 0.0f;
+ }
+}
+
+__kernel void
+UpdateSoftBodiesKernel(
+ const unsigned int startFace,
+ const unsigned int numFaces,
+ __global int4 * g_triangleVertexIndexSet,
+ __global float4 * g_vertexPositions,
+ __global float4 * g_vertexNormals,
+ __global float * g_vertexArea,
+ __global float4 * g_triangleNormals,
+ __global float * g_triangleArea)
+{
+ int faceID = get_global_id(0) + startFace;
+ if( get_global_id(0) < numFaces )
+ {
+ int4 triangleIndexSet = g_triangleVertexIndexSet[ faceID ];
+ int nodeIndex0 = triangleIndexSet.x;
+ int nodeIndex1 = triangleIndexSet.y;
+ int nodeIndex2 = triangleIndexSet.z;
+
+ float3 node0 = g_vertexPositions[nodeIndex0].xyz;
+ float3 node1 = g_vertexPositions[nodeIndex1].xyz;
+ float3 node2 = g_vertexPositions[nodeIndex2].xyz;
+ float3 nodeNormal0 = g_vertexNormals[nodeIndex0].xyz;
+ float3 nodeNormal1 = g_vertexNormals[nodeIndex1].xyz;
+ float3 nodeNormal2 = g_vertexNormals[nodeIndex2].xyz;
+ float vertexArea0 = g_vertexArea[nodeIndex0];
+ float vertexArea1 = g_vertexArea[nodeIndex1];
+ float vertexArea2 = g_vertexArea[nodeIndex2];
+
+ float3 vector0 = node1 - node0;
+ float3 vector1 = node2 - node0;
+
+ float3 faceNormal = cross(vector0.xyz, vector1.xyz);
+ float triangleArea = length(faceNormal);
+
+ nodeNormal0 = nodeNormal0 + faceNormal;
+ nodeNormal1 = nodeNormal1 + faceNormal;
+ nodeNormal2 = nodeNormal2 + faceNormal;
+ vertexArea0 = vertexArea0 + triangleArea;
+ vertexArea1 = vertexArea1 + triangleArea;
+ vertexArea2 = vertexArea2 + triangleArea;
+
+ g_triangleNormals[faceID] = (float4)(normalize(faceNormal), 0.f);
+ g_vertexNormals[nodeIndex0] = (float4)(nodeNormal0, 0.f);
+ g_vertexNormals[nodeIndex1] = (float4)(nodeNormal1, 0.f);
+ g_vertexNormals[nodeIndex2] = (float4)(nodeNormal2, 0.f);
+ g_triangleArea[faceID] = triangleArea;
+ g_vertexArea[nodeIndex0] = vertexArea0;
+ g_vertexArea[nodeIndex1] = vertexArea1;
+ g_vertexArea[nodeIndex2] = vertexArea2;
+ }
+}
+
+__kernel void
+NormalizeNormalsAndAreasKernel(
+ const unsigned int numNodes,
+ __global int * g_vertexTriangleCount,
+ __global float4 * g_vertexNormals,
+ __global float * g_vertexArea)
+{
+ if( get_global_id(0) < numNodes )
+ {
+ float4 normal = g_vertexNormals[get_global_id(0)];
+ float area = g_vertexArea[get_global_id(0)];
+ int numTriangles = g_vertexTriangleCount[get_global_id(0)];
+
+ float vectorLength = length(normal);
+
+ g_vertexNormals[get_global_id(0)] = normalize(normal);
+ g_vertexArea[get_global_id(0)] = area/(float)(numTriangles);
+ }
+}
+
+); \ No newline at end of file
diff --git a/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdatePositions.cl b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdatePositions.cl
new file mode 100644
index 00000000..ae7599a8
--- /dev/null
+++ b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdatePositions.cl
@@ -0,0 +1,36 @@
+MSTRINGIFY(
+
+//#define float3 float4
+
+__kernel void
+updateVelocitiesFromPositionsWithoutVelocitiesKernel(
+ const int numNodes,
+ const float isolverdt,
+ __global float4 * g_vertexPositions,
+ __global float4 * g_vertexPreviousPositions,
+ __global int * g_vertexClothIndices,
+ __global float * g_clothDampingFactor,
+ __global float4 * g_vertexVelocities,
+ __global float4 * g_vertexForces)
+
+{
+ int nodeID = get_global_id(0);
+ if( nodeID < numNodes )
+ {
+ float3 position = g_vertexPositions[nodeID].xyz;
+ float3 previousPosition = g_vertexPreviousPositions[nodeID].xyz;
+ float3 velocity = g_vertexVelocities[nodeID].xyz;
+ int clothIndex = g_vertexClothIndices[nodeID];
+ float dampingFactor = g_clothDampingFactor[clothIndex];
+ float velocityCoefficient = (1.f - dampingFactor);
+
+ float3 difference = position - previousPosition;
+
+ velocity = difference*velocityCoefficient*isolverdt;
+
+ g_vertexVelocities[nodeID] = (float4)(velocity, 0.f);
+ g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f);
+ }
+}
+
+); \ No newline at end of file
diff --git a/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdatePositionsFromVelocities.cl b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdatePositionsFromVelocities.cl
new file mode 100644
index 00000000..a3c94518
--- /dev/null
+++ b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/UpdatePositionsFromVelocities.cl
@@ -0,0 +1,26 @@
+MSTRINGIFY(
+
+//#define float3 float4
+
+__kernel void
+UpdatePositionsFromVelocitiesKernel(
+ const int numNodes,
+ const float solverSDT,
+ __global float4 * g_vertexVelocities,
+ __global float4 * g_vertexPreviousPositions,
+ __global float4 * g_vertexCurrentPosition)
+{
+ int vertexID = get_global_id(0);
+ if( vertexID < numNodes )
+ {
+ float3 previousPosition = g_vertexPreviousPositions[vertexID].xyz;
+ float3 velocity = g_vertexVelocities[vertexID].xyz;
+
+ float3 newPosition = previousPosition + velocity*solverSDT;
+
+ g_vertexCurrentPosition[vertexID] = (float4)(newPosition, 0.f);
+ g_vertexPreviousPositions[vertexID] = (float4)(newPosition, 0.f);
+ }
+}
+
+); \ No newline at end of file
diff --git a/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/VSolveLinks.cl b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/VSolveLinks.cl
new file mode 100644
index 00000000..b7345e37
--- /dev/null
+++ b/tests/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC/VSolveLinks.cl
@@ -0,0 +1,45 @@
+MSTRINGIFY(
+
+__kernel void
+VSolveLinksKernel(
+ int startLink,
+ int numLinks,
+ float kst,
+ __global int2 * g_linksVertexIndices,
+ __global float * g_linksLengthRatio,
+ __global float4 * g_linksCurrentLength,
+ __global float * g_vertexInverseMass,
+ __global float4 * g_vertexVelocity)
+{
+ int linkID = get_global_id(0) + startLink;
+ if( get_global_id(0) < numLinks )
+ {
+ int2 nodeIndices = g_linksVertexIndices[linkID];
+ int node0 = nodeIndices.x;
+ int node1 = nodeIndices.y;
+
+ float linkLengthRatio = g_linksLengthRatio[linkID];
+ float3 linkCurrentLength = g_linksCurrentLength[linkID].xyz;
+
+ float3 vertexVelocity0 = g_vertexVelocity[node0].xyz;
+ float3 vertexVelocity1 = g_vertexVelocity[node1].xyz;
+
+ float vertexInverseMass0 = g_vertexInverseMass[node0];
+ float vertexInverseMass1 = g_vertexInverseMass[node1];
+
+ float3 nodeDifference = vertexVelocity0 - vertexVelocity1;
+ float dotResult = dot(linkCurrentLength, nodeDifference);
+ float j = -dotResult*linkLengthRatio*kst;
+
+ float3 velocityChange0 = linkCurrentLength*(j*vertexInverseMass0);
+ float3 velocityChange1 = linkCurrentLength*(j*vertexInverseMass1);
+
+ vertexVelocity0 += velocityChange0;
+ vertexVelocity1 -= velocityChange1;
+
+ g_vertexVelocity[node0] = (float4)(vertexVelocity0, 0.f);
+ g_vertexVelocity[node1] = (float4)(vertexVelocity1, 0.f);
+ }
+}
+
+); \ No newline at end of file