mirror of
				https://github.com/godotengine/godot.git
				synced 2025-11-04 07:31:16 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			84 lines
		
	
	
	
		
			2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			84 lines
		
	
	
	
		
			2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
 | 
						|
#ifndef B3_RADIXSORT32_H
 | 
						|
#define B3_RADIXSORT32_H
 | 
						|
 | 
						|
#include "b3OpenCLArray.h"
 | 
						|
 | 
						|
struct b3SortData
 | 
						|
{
 | 
						|
	union {
 | 
						|
		unsigned int m_key;
 | 
						|
		unsigned int x;
 | 
						|
	};
 | 
						|
 | 
						|
	union {
 | 
						|
		unsigned int m_value;
 | 
						|
		unsigned int y;
 | 
						|
	};
 | 
						|
};
 | 
						|
#include "b3BufferInfoCL.h"
 | 
						|
 | 
						|
class b3RadixSort32CL
 | 
						|
{
 | 
						|
	b3OpenCLArray<unsigned int>* m_workBuffer1;
 | 
						|
	b3OpenCLArray<unsigned int>* m_workBuffer2;
 | 
						|
 | 
						|
	b3OpenCLArray<b3SortData>* m_workBuffer3;
 | 
						|
	b3OpenCLArray<b3SortData>* m_workBuffer4;
 | 
						|
 | 
						|
	b3OpenCLArray<unsigned int>* m_workBuffer3a;
 | 
						|
	b3OpenCLArray<unsigned int>* m_workBuffer4a;
 | 
						|
 | 
						|
	cl_command_queue m_commandQueue;
 | 
						|
 | 
						|
	cl_kernel m_streamCountSortDataKernel;
 | 
						|
	cl_kernel m_streamCountKernel;
 | 
						|
 | 
						|
	cl_kernel m_prefixScanKernel;
 | 
						|
	cl_kernel m_sortAndScatterSortDataKernel;
 | 
						|
	cl_kernel m_sortAndScatterKernel;
 | 
						|
 | 
						|
	bool m_deviceCPU;
 | 
						|
 | 
						|
	class b3PrefixScanCL* m_scan;
 | 
						|
	class b3FillCL* m_fill;
 | 
						|
 | 
						|
public:
 | 
						|
	struct b3ConstData
 | 
						|
	{
 | 
						|
		int m_n;
 | 
						|
		int m_nWGs;
 | 
						|
		int m_startBit;
 | 
						|
		int m_nBlocksPerWG;
 | 
						|
	};
 | 
						|
	enum
 | 
						|
	{
 | 
						|
		DATA_ALIGNMENT = 256,
 | 
						|
		WG_SIZE = 64,
 | 
						|
		BLOCK_SIZE = 256,
 | 
						|
		ELEMENTS_PER_WORK_ITEM = (BLOCK_SIZE / WG_SIZE),
 | 
						|
		BITS_PER_PASS = 4,
 | 
						|
		NUM_BUCKET = (1 << BITS_PER_PASS),
 | 
						|
		//	if you change this, change nPerWI in kernel as well
 | 
						|
		NUM_WGS = 20 * 6,  //	cypress
 | 
						|
						   //			NUM_WGS = 24*6,	//	cayman
 | 
						|
						   //			NUM_WGS = 32*4,	//	nv
 | 
						|
	};
 | 
						|
 | 
						|
private:
 | 
						|
public:
 | 
						|
	b3RadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity = 0);
 | 
						|
 | 
						|
	virtual ~b3RadixSort32CL();
 | 
						|
 | 
						|
	void execute(b3OpenCLArray<unsigned int>& keysIn, b3OpenCLArray<unsigned int>& keysOut, b3OpenCLArray<unsigned int>& valuesIn,
 | 
						|
				 b3OpenCLArray<unsigned int>& valuesOut, int n, int sortBits = 32);
 | 
						|
 | 
						|
	///keys only
 | 
						|
	void execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBits = 32);
 | 
						|
 | 
						|
	void execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32);
 | 
						|
	void executeHost(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32);
 | 
						|
	void executeHost(b3AlignedObjectArray<b3SortData>& keyValuesInOut, int sortBits = 32);
 | 
						|
};
 | 
						|
#endif  //B3_RADIXSORT32_H
 |