mirror of
				https://github.com/godotengine/godot.git
				synced 2025-10-26 03:04:31 +00:00 
			
		
		
		
	 767e374dce
			
		
	
	
		767e374dce
		
	
	
	
	
		
			
			Since Embree v3.13.0 supports AARCH64, switch back to the official repo instead of using Embree-aarch64. `thirdparty/embree/patches/godot-changes.patch` should now contain an accurate diff of the changes done to the library.
		
			
				
	
	
		
			283 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			283 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Copyright 2009-2021 Intel Corporation
 | |
| // SPDX-License-Identifier: Apache-2.0
 | |
| 
 | |
| #pragma once
 | |
| 
 | |
| #include "parallel_for.h"
 | |
| #include "../math/range.h"
 | |
| 
 | |
| namespace embree
 | |
| {
 | |
|   /* serial partitioning */
 | |
|   template<typename T, typename V, typename IsLeft, typename Reduction_T>
 | |
|     __forceinline size_t serial_partitioning(T* array, 
 | |
|                                              const size_t begin,
 | |
|                                              const size_t end, 
 | |
|                                              V& leftReduction,
 | |
|                                              V& rightReduction,
 | |
|                                              const IsLeft& is_left, 
 | |
|                                              const Reduction_T& reduction_t)
 | |
|   {
 | |
|     T* l = array + begin;
 | |
|     T* r = array + end - 1;
 | |
|     
 | |
|     while(1)
 | |
|     {
 | |
|       /* *l < pivot */
 | |
|       while (likely(l <= r && is_left(*l) )) 
 | |
|       {
 | |
|         //prefetchw(l+4); // FIXME: enable?
 | |
|         reduction_t(leftReduction,*l);
 | |
|         ++l;
 | |
|       }
 | |
|       /* *r >= pivot) */
 | |
|       while (likely(l <= r && !is_left(*r)))
 | |
|       {
 | |
|         //prefetchw(r-4); FIXME: enable?
 | |
|         reduction_t(rightReduction,*r);
 | |
|         --r;
 | |
|       }
 | |
|       if (r<l) break;
 | |
|       
 | |
|       reduction_t(leftReduction ,*r);
 | |
|       reduction_t(rightReduction,*l);
 | |
|       xchg(*l,*r);
 | |
|       l++; r--;
 | |
|     }
 | |
|     
 | |
|     return l - array;        
 | |
|   }
 | |
| 
 | |
|   template<typename T, typename V, typename Vi, typename IsLeft, typename Reduction_T, typename Reduction_V>
 | |
|     class __aligned(64) parallel_partition_task
 | |
|   {
 | |
|     ALIGNED_CLASS_(64);
 | |
|   private:
 | |
| 
 | |
|     static const size_t MAX_TASKS = 64;
 | |
| 
 | |
|     T* array;
 | |
|     size_t N;
 | |
|     const IsLeft& is_left;
 | |
|     const Reduction_T& reduction_t;
 | |
|     const Reduction_V& reduction_v;
 | |
|     const Vi& identity;
 | |
| 
 | |
|     size_t numTasks; 
 | |
|     __aligned(64) size_t counter_start[MAX_TASKS+1]; 
 | |
|     __aligned(64) size_t counter_left[MAX_TASKS+1];  
 | |
|     __aligned(64) range<ssize_t> leftMisplacedRanges[MAX_TASKS];  
 | |
|     __aligned(64) range<ssize_t> rightMisplacedRanges[MAX_TASKS]; 
 | |
|     __aligned(64) V leftReductions[MAX_TASKS];           
 | |
|     __aligned(64) V rightReductions[MAX_TASKS];    
 | |
| 
 | |
|   public:
 | |
|      
 | |
|     __forceinline parallel_partition_task(T* array, 
 | |
|                                           const size_t N, 
 | |
|                                           const Vi& identity, 
 | |
|                                           const IsLeft& is_left, 
 | |
|                                           const Reduction_T& reduction_t, 
 | |
|                                           const Reduction_V& reduction_v,
 | |
|                                           const size_t BLOCK_SIZE) 
 | |
| 
 | |
|       : array(array), N(N), is_left(is_left), reduction_t(reduction_t), reduction_v(reduction_v), identity(identity),
 | |
|       numTasks(min((N+BLOCK_SIZE-1)/BLOCK_SIZE,min(TaskScheduler::threadCount(),MAX_TASKS))) {}
 | |
| 
 | |
|     __forceinline const range<ssize_t>* findStartRange(size_t& index, const range<ssize_t>* const r, const size_t numRanges)
 | |
|     {
 | |
|       size_t i = 0;
 | |
|       while(index >= (size_t)r[i].size())
 | |
|       {
 | |
|         assert(i < numRanges);
 | |
|         index -= (size_t)r[i].size();
 | |
|         i++;
 | |
|       }	    
 | |
|       return &r[i];
 | |
|     }
 | |
| 
 | |
|     __forceinline void swapItemsInMisplacedRanges(const size_t numLeftMisplacedRanges,
 | |
|                                                   const size_t numRightMisplacedRanges,
 | |
|                                                   const size_t startID,
 | |
|                                                   const size_t endID)
 | |
|     {
 | |
|       size_t leftLocalIndex  = startID;
 | |
|       size_t rightLocalIndex = startID;
 | |
|       const range<ssize_t>* l_range = findStartRange(leftLocalIndex,leftMisplacedRanges,numLeftMisplacedRanges);
 | |
|       const range<ssize_t>* r_range = findStartRange(rightLocalIndex,rightMisplacedRanges,numRightMisplacedRanges);
 | |
|       
 | |
|       size_t l_left = l_range->size() - leftLocalIndex;
 | |
|       size_t r_left = r_range->size() - rightLocalIndex;
 | |
|       T *__restrict__ l = &array[l_range->begin() + leftLocalIndex];
 | |
|       T *__restrict__ r = &array[r_range->begin() + rightLocalIndex];
 | |
|       size_t size = endID - startID;
 | |
|       size_t items = min(size,min(l_left,r_left)); 
 | |
|      
 | |
|       while (size)
 | |
|       {
 | |
|         if (unlikely(l_left == 0))
 | |
|         {
 | |
|           l_range++;
 | |
|           l_left = l_range->size();
 | |
|           l = &array[l_range->begin()];
 | |
|           items = min(size,min(l_left,r_left));
 | |
|         }
 | |
| 
 | |
|         if (unlikely(r_left == 0))
 | |
|         {		
 | |
|           r_range++;
 | |
|           r_left = r_range->size();
 | |
|           r = &array[r_range->begin()];          
 | |
|           items = min(size,min(l_left,r_left));
 | |
|         }
 | |
| 
 | |
|         size   -= items;
 | |
|         l_left -= items;
 | |
|         r_left -= items;
 | |
| 
 | |
|         while(items) {
 | |
|           items--;
 | |
|           xchg(*l++,*r++);
 | |
|         }
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     __forceinline size_t partition(V& leftReduction, V& rightReduction)
 | |
|     {
 | |
|       /* partition the individual ranges for each task */
 | |
|       parallel_for(numTasks,[&] (const size_t taskID) {
 | |
|           const size_t startID = (taskID+0)*N/numTasks;
 | |
|           const size_t endID   = (taskID+1)*N/numTasks;
 | |
|           V local_left(identity);
 | |
|           V local_right(identity);
 | |
|           const size_t mid = serial_partitioning(array,startID,endID,local_left,local_right,is_left,reduction_t);
 | |
|           counter_start[taskID] = startID;
 | |
|           counter_left [taskID] = mid-startID;
 | |
|           leftReductions[taskID]  = local_left;
 | |
|           rightReductions[taskID] = local_right;
 | |
|         });
 | |
|       counter_start[numTasks] = N;
 | |
|       counter_left[numTasks]  = 0;
 | |
|       
 | |
|       /* finalize the reductions */
 | |
|       for (size_t i=0; i<numTasks; i++) {
 | |
|         reduction_v(leftReduction,leftReductions[i]);
 | |
|         reduction_v(rightReduction,rightReductions[i]);
 | |
|       }
 | |
| 
 | |
|       /* calculate mid point for partitioning */
 | |
|       size_t mid = counter_left[0];
 | |
|       for (size_t i=1; i<numTasks; i++)
 | |
|         mid += counter_left[i];
 | |
|       const range<ssize_t> globalLeft (0,mid);
 | |
|       const range<ssize_t> globalRight(mid,N);
 | |
| 
 | |
|       /* calculate all left and right ranges that are on the wrong global side */
 | |
|       size_t numMisplacedRangesLeft  = 0;
 | |
|       size_t numMisplacedRangesRight = 0;
 | |
|       size_t numMisplacedItemsLeft   = 0;
 | |
|       size_t numMisplacedItemsRight  = 0;
 | |
| 
 | |
|       for (size_t i=0; i<numTasks; i++)
 | |
|       {	    
 | |
|         const range<ssize_t> left_range (counter_start[i], counter_start[i] + counter_left[i]);
 | |
|         const range<ssize_t> right_range(counter_start[i] + counter_left[i], counter_start[i+1]);
 | |
|         const range<ssize_t> left_misplaced  = globalLeft. intersect(right_range);
 | |
|         const range<ssize_t> right_misplaced = globalRight.intersect(left_range);
 | |
| 
 | |
|         if (!left_misplaced.empty())  
 | |
|         {
 | |
|           numMisplacedItemsLeft += left_misplaced.size();
 | |
|           leftMisplacedRanges[numMisplacedRangesLeft++] = left_misplaced;
 | |
|         }
 | |
| 
 | |
|         if (!right_misplaced.empty()) 
 | |
|         {
 | |
|           numMisplacedItemsRight += right_misplaced.size();
 | |
|           rightMisplacedRanges[numMisplacedRangesRight++] = right_misplaced;
 | |
|         }
 | |
|       }
 | |
|       assert( numMisplacedItemsLeft == numMisplacedItemsRight );
 | |
| 
 | |
|       /* if no items are misplaced we are done */
 | |
|       if (numMisplacedItemsLeft == 0)
 | |
|         return mid;
 | |
| 
 | |
|       /* otherwise we copy the items to the right place in parallel */
 | |
|       parallel_for(numTasks,[&] (const size_t taskID) {
 | |
|           const size_t startID = (taskID+0)*numMisplacedItemsLeft/numTasks;
 | |
|           const size_t endID   = (taskID+1)*numMisplacedItemsLeft/numTasks;
 | |
|           swapItemsInMisplacedRanges(numMisplacedRangesLeft,numMisplacedRangesRight,startID,endID);	                             
 | |
|         });
 | |
| 
 | |
|       return mid;
 | |
|     }
 | |
|   };
 | |
| 
 | |
|   template<typename T, typename V, typename Vi, typename IsLeft, typename Reduction_T, typename Reduction_V>
 | |
|     __noinline size_t parallel_partitioning(T* array, 
 | |
|                                             const size_t begin,
 | |
|                                             const size_t end, 
 | |
|                                             const Vi &identity,
 | |
|                                             V &leftReduction,
 | |
|                                             V &rightReduction,
 | |
|                                             const IsLeft& is_left, 
 | |
|                                             const Reduction_T& reduction_t,
 | |
|                                             const Reduction_V& reduction_v,
 | |
|                                             size_t BLOCK_SIZE = 128)
 | |
|   {
 | |
|     /* fall back to single threaded partitioning for small N */
 | |
|     if (unlikely(end-begin < BLOCK_SIZE))
 | |
|       return serial_partitioning(array,begin,end,leftReduction,rightReduction,is_left,reduction_t);
 | |
| 
 | |
|     /* otherwise use parallel code */
 | |
|     else {
 | |
|       typedef parallel_partition_task<T,V,Vi,IsLeft,Reduction_T,Reduction_V> partition_task;
 | |
|       std::unique_ptr<partition_task> p(new partition_task(&array[begin],end-begin,identity,is_left,reduction_t,reduction_v,BLOCK_SIZE));
 | |
|       return begin+p->partition(leftReduction,rightReduction);    
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   template<typename T, typename V, typename Vi, typename IsLeft, typename Reduction_T, typename Reduction_V>
 | |
|     __noinline size_t parallel_partitioning(T* array, 
 | |
|                                             const size_t begin,
 | |
|                                             const size_t end, 
 | |
|                                             const Vi &identity,
 | |
|                                             V &leftReduction,
 | |
|                                             V &rightReduction,
 | |
|                                             const IsLeft& is_left, 
 | |
|                                             const Reduction_T& reduction_t,
 | |
|                                             const Reduction_V& reduction_v,
 | |
|                                             size_t BLOCK_SIZE,
 | |
|                                             size_t PARALLEL_THRESHOLD)
 | |
|   {
 | |
|     /* fall back to single threaded partitioning for small N */
 | |
|     if (unlikely(end-begin < PARALLEL_THRESHOLD))
 | |
|       return serial_partitioning(array,begin,end,leftReduction,rightReduction,is_left,reduction_t);
 | |
| 
 | |
|     /* otherwise use parallel code */
 | |
|     else {
 | |
|       typedef parallel_partition_task<T,V,Vi,IsLeft,Reduction_T,Reduction_V> partition_task;
 | |
|       std::unique_ptr<partition_task> p(new partition_task(&array[begin],end-begin,identity,is_left,reduction_t,reduction_v,BLOCK_SIZE));
 | |
|       return begin+p->partition(leftReduction,rightReduction);    
 | |
|     }
 | |
|   }
 | |
| 
 | |
| 
 | |
|   template<typename T, typename IsLeft>
 | |
|     inline size_t parallel_partitioning(T* array, 
 | |
|                                         const size_t begin,
 | |
|                                         const size_t end, 
 | |
|                                         const IsLeft& is_left, 
 | |
|                                         size_t BLOCK_SIZE = 128)
 | |
|   {
 | |
|     size_t leftReduction = 0;
 | |
|     size_t rightReduction = 0;
 | |
|     return parallel_partitioning(
 | |
|       array,begin,end,0,leftReduction,rightReduction,is_left,
 | |
|       [] (size_t& t,const T& ref) {  },
 | |
|       [] (size_t& t0,size_t& t1) { },
 | |
|       BLOCK_SIZE);
 | |
|   }
 | |
| 
 | |
| }
 |