mirror of
				https://github.com/godotengine/godot.git
				synced 2025-11-04 07:31:16 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			128 lines
		
	
	
	
		
			2.9 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			128 lines
		
	
	
	
		
			2.9 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
 | 
						|
#ifndef B3_LAUNCHER_CL_H
 | 
						|
#define B3_LAUNCHER_CL_H
 | 
						|
 | 
						|
#include "b3BufferInfoCL.h"
 | 
						|
#include "Bullet3Common/b3MinMax.h"
 | 
						|
#include "b3OpenCLArray.h"
 | 
						|
#include <stdio.h>
 | 
						|
 | 
						|
#define B3_DEBUG_SERIALIZE_CL
 | 
						|
 | 
						|
#ifdef _WIN32
 | 
						|
#pragma warning(disable : 4996)
 | 
						|
#endif
 | 
						|
#define B3_CL_MAX_ARG_SIZE 16
 | 
						|
B3_ATTRIBUTE_ALIGNED16(struct)
 | 
						|
b3KernelArgData
 | 
						|
{
 | 
						|
	int m_isBuffer;
 | 
						|
	int m_argIndex;
 | 
						|
	int m_argSizeInBytes;
 | 
						|
	int m_unusedPadding;
 | 
						|
	union {
 | 
						|
		cl_mem m_clBuffer;
 | 
						|
		unsigned char m_argData[B3_CL_MAX_ARG_SIZE];
 | 
						|
	};
 | 
						|
};
 | 
						|
 | 
						|
class b3LauncherCL
 | 
						|
{
 | 
						|
	cl_command_queue m_commandQueue;
 | 
						|
	cl_kernel m_kernel;
 | 
						|
	int m_idx;
 | 
						|
 | 
						|
	b3AlignedObjectArray<b3KernelArgData> m_kernelArguments;
 | 
						|
	int m_serializationSizeInBytes;
 | 
						|
	bool m_enableSerialization;
 | 
						|
 | 
						|
	const char* m_name;
 | 
						|
 | 
						|
public:
 | 
						|
	b3AlignedObjectArray<b3OpenCLArray<unsigned char>*> m_arrays;
 | 
						|
 | 
						|
	b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name);
 | 
						|
 | 
						|
	virtual ~b3LauncherCL();
 | 
						|
 | 
						|
	void setBuffer(cl_mem clBuffer);
 | 
						|
 | 
						|
	void setBuffers(b3BufferInfoCL* buffInfo, int n);
 | 
						|
 | 
						|
	int getSerializationBufferSize() const
 | 
						|
	{
 | 
						|
		return m_serializationSizeInBytes;
 | 
						|
	}
 | 
						|
 | 
						|
	int deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx);
 | 
						|
 | 
						|
	inline int validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx);
 | 
						|
 | 
						|
	int serializeArguments(unsigned char* destBuffer, int destBufferCapacity);
 | 
						|
 | 
						|
	int getNumArguments() const
 | 
						|
	{
 | 
						|
		return m_kernelArguments.size();
 | 
						|
	}
 | 
						|
 | 
						|
	b3KernelArgData getArgument(int index)
 | 
						|
	{
 | 
						|
		return m_kernelArguments[index];
 | 
						|
	}
 | 
						|
 | 
						|
	void serializeToFile(const char* fileName, int numWorkItems);
 | 
						|
 | 
						|
	template <typename T>
 | 
						|
	inline void setConst(const T& consts)
 | 
						|
	{
 | 
						|
		int sz = sizeof(T);
 | 
						|
		b3Assert(sz <= B3_CL_MAX_ARG_SIZE);
 | 
						|
 | 
						|
		if (m_enableSerialization)
 | 
						|
		{
 | 
						|
			b3KernelArgData kernelArg;
 | 
						|
			kernelArg.m_argIndex = m_idx;
 | 
						|
			kernelArg.m_isBuffer = 0;
 | 
						|
			T* destArg = (T*)kernelArg.m_argData;
 | 
						|
			*destArg = consts;
 | 
						|
			kernelArg.m_argSizeInBytes = sizeof(T);
 | 
						|
			m_kernelArguments.push_back(kernelArg);
 | 
						|
			m_serializationSizeInBytes += sizeof(b3KernelArgData);
 | 
						|
		}
 | 
						|
 | 
						|
		cl_int status = clSetKernelArg(m_kernel, m_idx++, sz, &consts);
 | 
						|
		b3Assert(status == CL_SUCCESS);
 | 
						|
	}
 | 
						|
 | 
						|
	inline void launch1D(int numThreads, int localSize = 64)
 | 
						|
	{
 | 
						|
		launch2D(numThreads, 1, localSize, 1);
 | 
						|
	}
 | 
						|
 | 
						|
	inline void launch2D(int numThreadsX, int numThreadsY, int localSizeX, int localSizeY)
 | 
						|
	{
 | 
						|
		size_t gRange[3] = {1, 1, 1};
 | 
						|
		size_t lRange[3] = {1, 1, 1};
 | 
						|
		lRange[0] = localSizeX;
 | 
						|
		lRange[1] = localSizeY;
 | 
						|
		gRange[0] = b3Max((size_t)1, (numThreadsX / lRange[0]) + (!(numThreadsX % lRange[0]) ? 0 : 1));
 | 
						|
		gRange[0] *= lRange[0];
 | 
						|
		gRange[1] = b3Max((size_t)1, (numThreadsY / lRange[1]) + (!(numThreadsY % lRange[1]) ? 0 : 1));
 | 
						|
		gRange[1] *= lRange[1];
 | 
						|
 | 
						|
		cl_int status = clEnqueueNDRangeKernel(m_commandQueue,
 | 
						|
											   m_kernel, 2, NULL, gRange, lRange, 0, 0, 0);
 | 
						|
		if (status != CL_SUCCESS)
 | 
						|
		{
 | 
						|
			printf("Error: OpenCL status = %d\n", status);
 | 
						|
		}
 | 
						|
		b3Assert(status == CL_SUCCESS);
 | 
						|
	}
 | 
						|
 | 
						|
	void enableSerialization(bool serialize)
 | 
						|
	{
 | 
						|
		m_enableSerialization = serialize;
 | 
						|
	}
 | 
						|
};
 | 
						|
 | 
						|
#endif  //B3_LAUNCHER_CL_H
 |