mirror of
				https://github.com/godotengine/godot.git
				synced 2025-11-04 07:31:16 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			203 lines
		
	
	
	
		
			6 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			203 lines
		
	
	
	
		
			6 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/*
 | 
						|
Copyright (c) 2012 Advanced Micro Devices, Inc.  
 | 
						|
 | 
						|
This software is provided 'as-is', without any express or implied warranty.
 | 
						|
In no event will the authors be held liable for any damages arising from the use of this software.
 | 
						|
Permission is granted to anyone to use this software for any purpose, 
 | 
						|
including commercial applications, and to alter it and redistribute it freely, 
 | 
						|
subject to the following restrictions:
 | 
						|
 | 
						|
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
 | 
						|
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 | 
						|
3. This notice may not be removed or altered from any source distribution.
 | 
						|
*/
 | 
						|
//Originally written by Takahiro Harada
 | 
						|
//Host-code rewritten by Erwin Coumans
 | 
						|
 | 
						|
#define BOUNDSEARCH_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernels.cl"
 | 
						|
#define KERNEL0 "SearchSortDataLowerKernel"
 | 
						|
#define KERNEL1 "SearchSortDataUpperKernel"
 | 
						|
#define KERNEL2 "SubtractKernel"
 | 
						|
 | 
						|
#include "b3BoundSearchCL.h"
 | 
						|
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
 | 
						|
#include "b3LauncherCL.h"
 | 
						|
#include "kernels/BoundSearchKernelsCL.h"
 | 
						|
 | 
						|
b3BoundSearchCL::b3BoundSearchCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int maxSize)
 | 
						|
	: m_context(ctx),
 | 
						|
	  m_device(device),
 | 
						|
	  m_queue(queue)
 | 
						|
{
 | 
						|
	const char* additionalMacros = "";
 | 
						|
	//const char* srcFileNameForCaching="";
 | 
						|
 | 
						|
	cl_int pErrNum;
 | 
						|
	const char* kernelSource = boundSearchKernelsCL;
 | 
						|
 | 
						|
	cl_program boundSearchProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, kernelSource, &pErrNum, additionalMacros, BOUNDSEARCH_PATH);
 | 
						|
	b3Assert(boundSearchProg);
 | 
						|
 | 
						|
	m_lowerSortDataKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "SearchSortDataLowerKernel", &pErrNum, boundSearchProg, additionalMacros);
 | 
						|
	b3Assert(m_lowerSortDataKernel);
 | 
						|
 | 
						|
	m_upperSortDataKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "SearchSortDataUpperKernel", &pErrNum, boundSearchProg, additionalMacros);
 | 
						|
	b3Assert(m_upperSortDataKernel);
 | 
						|
 | 
						|
	m_subtractKernel = 0;
 | 
						|
 | 
						|
	if (maxSize)
 | 
						|
	{
 | 
						|
		m_subtractKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "SubtractKernel", &pErrNum, boundSearchProg, additionalMacros);
 | 
						|
		b3Assert(m_subtractKernel);
 | 
						|
	}
 | 
						|
 | 
						|
	//m_constBuffer = new b3OpenCLArray<b3Int4>( device, 1, BufferBase::BUFFER_CONST );
 | 
						|
 | 
						|
	m_lower = (maxSize == 0) ? 0 : new b3OpenCLArray<unsigned int>(ctx, queue, maxSize);
 | 
						|
	m_upper = (maxSize == 0) ? 0 : new b3OpenCLArray<unsigned int>(ctx, queue, maxSize);
 | 
						|
 | 
						|
	m_filler = new b3FillCL(ctx, device, queue);
 | 
						|
}
 | 
						|
 | 
						|
b3BoundSearchCL::~b3BoundSearchCL()
 | 
						|
{
 | 
						|
	delete m_lower;
 | 
						|
	delete m_upper;
 | 
						|
	delete m_filler;
 | 
						|
 | 
						|
	clReleaseKernel(m_lowerSortDataKernel);
 | 
						|
	clReleaseKernel(m_upperSortDataKernel);
 | 
						|
	clReleaseKernel(m_subtractKernel);
 | 
						|
}
 | 
						|
 | 
						|
void b3BoundSearchCL::execute(b3OpenCLArray<b3SortData>& src, int nSrc, b3OpenCLArray<unsigned int>& dst, int nDst, Option option)
 | 
						|
{
 | 
						|
	b3Int4 constBuffer;
 | 
						|
	constBuffer.x = nSrc;
 | 
						|
	constBuffer.y = nDst;
 | 
						|
 | 
						|
	if (option == BOUND_LOWER)
 | 
						|
	{
 | 
						|
		b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src.getBufferCL(), true), b3BufferInfoCL(dst.getBufferCL())};
 | 
						|
 | 
						|
		b3LauncherCL launcher(m_queue, m_lowerSortDataKernel, "m_lowerSortDataKernel");
 | 
						|
		launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
 | 
						|
		launcher.setConst(nSrc);
 | 
						|
		launcher.setConst(nDst);
 | 
						|
 | 
						|
		launcher.launch1D(nSrc, 64);
 | 
						|
	}
 | 
						|
	else if (option == BOUND_UPPER)
 | 
						|
	{
 | 
						|
		b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src.getBufferCL(), true), b3BufferInfoCL(dst.getBufferCL())};
 | 
						|
 | 
						|
		b3LauncherCL launcher(m_queue, m_upperSortDataKernel, "m_upperSortDataKernel");
 | 
						|
		launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
 | 
						|
		launcher.setConst(nSrc);
 | 
						|
		launcher.setConst(nDst);
 | 
						|
 | 
						|
		launcher.launch1D(nSrc, 64);
 | 
						|
	}
 | 
						|
	else if (option == COUNT)
 | 
						|
	{
 | 
						|
		b3Assert(m_lower);
 | 
						|
		b3Assert(m_upper);
 | 
						|
		b3Assert(m_lower->capacity() <= (int)nDst);
 | 
						|
		b3Assert(m_upper->capacity() <= (int)nDst);
 | 
						|
 | 
						|
		int zero = 0;
 | 
						|
		m_filler->execute(*m_lower, zero, nDst);
 | 
						|
		m_filler->execute(*m_upper, zero, nDst);
 | 
						|
 | 
						|
		execute(src, nSrc, *m_lower, nDst, BOUND_LOWER);
 | 
						|
		execute(src, nSrc, *m_upper, nDst, BOUND_UPPER);
 | 
						|
 | 
						|
		{
 | 
						|
			b3BufferInfoCL bInfo[] = {b3BufferInfoCL(m_upper->getBufferCL(), true), b3BufferInfoCL(m_lower->getBufferCL(), true), b3BufferInfoCL(dst.getBufferCL())};
 | 
						|
 | 
						|
			b3LauncherCL launcher(m_queue, m_subtractKernel, "m_subtractKernel");
 | 
						|
			launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
 | 
						|
			launcher.setConst(nSrc);
 | 
						|
			launcher.setConst(nDst);
 | 
						|
 | 
						|
			launcher.launch1D(nDst, 64);
 | 
						|
		}
 | 
						|
	}
 | 
						|
	else
 | 
						|
	{
 | 
						|
		b3Assert(0);
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
void b3BoundSearchCL::executeHost(b3AlignedObjectArray<b3SortData>& src, int nSrc,
 | 
						|
								  b3AlignedObjectArray<unsigned int>& dst, int nDst, Option option)
 | 
						|
{
 | 
						|
	for (int i = 0; i < nSrc - 1; i++)
 | 
						|
		b3Assert(src[i].m_key <= src[i + 1].m_key);
 | 
						|
 | 
						|
	b3SortData minData, zeroData, maxData;
 | 
						|
	minData.m_key = -1;
 | 
						|
	minData.m_value = -1;
 | 
						|
	zeroData.m_key = 0;
 | 
						|
	zeroData.m_value = 0;
 | 
						|
	maxData.m_key = nDst;
 | 
						|
	maxData.m_value = nDst;
 | 
						|
 | 
						|
	if (option == BOUND_LOWER)
 | 
						|
	{
 | 
						|
		for (int i = 0; i < nSrc; i++)
 | 
						|
		{
 | 
						|
			b3SortData& iData = (i == 0) ? minData : src[i - 1];
 | 
						|
			b3SortData& jData = (i == nSrc) ? maxData : src[i];
 | 
						|
 | 
						|
			if (iData.m_key != jData.m_key)
 | 
						|
			{
 | 
						|
				int k = jData.m_key;
 | 
						|
				{
 | 
						|
					dst[k] = i;
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	else if (option == BOUND_UPPER)
 | 
						|
	{
 | 
						|
		for (int i = 1; i < nSrc + 1; i++)
 | 
						|
		{
 | 
						|
			b3SortData& iData = src[i - 1];
 | 
						|
			b3SortData& jData = (i == nSrc) ? maxData : src[i];
 | 
						|
 | 
						|
			if (iData.m_key != jData.m_key)
 | 
						|
			{
 | 
						|
				int k = iData.m_key;
 | 
						|
				{
 | 
						|
					dst[k] = i;
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	else if (option == COUNT)
 | 
						|
	{
 | 
						|
		b3AlignedObjectArray<unsigned int> lower;
 | 
						|
		lower.resize(nDst);
 | 
						|
		b3AlignedObjectArray<unsigned int> upper;
 | 
						|
		upper.resize(nDst);
 | 
						|
 | 
						|
		for (int i = 0; i < nDst; i++)
 | 
						|
		{
 | 
						|
			lower[i] = upper[i] = 0;
 | 
						|
		}
 | 
						|
 | 
						|
		executeHost(src, nSrc, lower, nDst, BOUND_LOWER);
 | 
						|
		executeHost(src, nSrc, upper, nDst, BOUND_UPPER);
 | 
						|
 | 
						|
		for (int i = 0; i < nDst; i++)
 | 
						|
		{
 | 
						|
			dst[i] = upper[i] - lower[i];
 | 
						|
		}
 | 
						|
	}
 | 
						|
	else
 | 
						|
	{
 | 
						|
		b3Assert(0);
 | 
						|
	}
 | 
						|
}
 |