mirror of
				https://github.com/godotengine/godot.git
				synced 2025-10-30 21:21:10 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			443 lines
		
	
	
	
		
			18 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			443 lines
		
	
	
	
		
			18 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Copyright 2009-2021 Intel Corporation
 | |
| // SPDX-License-Identifier: Apache-2.0
 | |
| 
 | |
| // TODO: 
 | |
| //       - adjust parallel build thresholds
 | |
| //       - openNodesBasedOnExtend should consider max extended size
 | |
|   
 | |
| #pragma once
 | |
| 
 | |
| #include "heuristic_binning.h"
 | |
| #include "heuristic_spatial.h"
 | |
| 
 | |
| /* stop opening of all bref.geomIDs are the same */
 | |
| #define EQUAL_GEOMID_STOP_CRITERIA 1
 | |
| 
 | |
| /* 10% spatial extend threshold */
 | |
| #define MAX_EXTEND_THRESHOLD   0.1f
 | |
| 
 | |
| /* maximum is 8 children */
 | |
| #define MAX_OPENED_CHILD_NODES 8
 | |
| 
 | |
| /* open until all build refs are below threshold size in one step */
 | |
| #define USE_LOOP_OPENING 0
 | |
| 
 | |
| namespace embree
 | |
| {
 | |
|   namespace isa
 | |
|   { 
 | |
|     /*! Performs standard object binning */
 | |
|     template<typename NodeOpenerFunc, typename PrimRef, size_t OBJECT_BINS>
 | |
|       struct HeuristicArrayOpenMergeSAH
 | |
|       {
 | |
|         typedef BinSplit<OBJECT_BINS> Split;
 | |
|         typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> Binner;
 | |
|         
 | |
|         static const size_t PARALLEL_THRESHOLD = 1024;
 | |
|         static const size_t PARALLEL_FIND_BLOCK_SIZE = 512;
 | |
|         static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
 | |
| 
 | |
|         static const size_t MOVE_STEP_SIZE = 64;
 | |
|         static const size_t CREATE_SPLITS_STEP_SIZE = 128;
 | |
| 
 | |
|         __forceinline HeuristicArrayOpenMergeSAH ()
 | |
|           : prims0(nullptr) {}
 | |
|         
 | |
|         /*! remember prim array */
 | |
|         __forceinline HeuristicArrayOpenMergeSAH (const NodeOpenerFunc& nodeOpenerFunc, PrimRef* prims0, size_t max_open_size)
 | |
|           : prims0(prims0), nodeOpenerFunc(nodeOpenerFunc), max_open_size(max_open_size) 
 | |
|         {
 | |
|           assert(max_open_size <= MAX_OPENED_CHILD_NODES);
 | |
|         }
 | |
| 
 | |
|         struct OpenHeuristic
 | |
|         {
 | |
|           __forceinline OpenHeuristic( const PrimInfoExtRange& pinfo )
 | |
|           {
 | |
|             const Vec3fa diag = pinfo.geomBounds.size();
 | |
|             dim = maxDim(diag);
 | |
|             assert(diag[dim] > 0.0f);
 | |
|             inv_max_extend = 1.0f / diag[dim];
 | |
|           }
 | |
| 
 | |
|           __forceinline bool operator () ( PrimRef& prim ) const {
 | |
|             return !prim.node.isLeaf() && prim.bounds().size()[dim] * inv_max_extend > MAX_EXTEND_THRESHOLD;
 | |
|           }
 | |
| 
 | |
|         private:
 | |
|           size_t dim;
 | |
|           float inv_max_extend;
 | |
|         };
 | |
| 
 | |
|         /*! compute extended ranges */
 | |
|         __forceinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight)
 | |
|         {
 | |
|           assert(set.ext_range_size() > 0);
 | |
|           const float left_factor           = (float)lweight / (lweight + rweight);
 | |
|           const size_t ext_range_size       = set.ext_range_size();
 | |
|           const size_t left_ext_range_size  = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size);
 | |
|           const size_t right_ext_range_size = ext_range_size - left_ext_range_size;
 | |
|           lset.set_ext_range(lset.end() + left_ext_range_size);
 | |
|           rset.set_ext_range(rset.end() + right_ext_range_size);
 | |
|         }
 | |
| 
 | |
|         /*! move ranges */
 | |
|         __forceinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset)
 | |
|         {
 | |
|           const size_t left_ext_range_size = lset.ext_range_size();
 | |
|           const size_t right_size = rset.size();
 | |
| 
 | |
|           /* has the left child an extended range? */
 | |
|           if (left_ext_range_size > 0)
 | |
|           {
 | |
|             /* left extended range smaller than right range ? */
 | |
|             if (left_ext_range_size < right_size)
 | |
|             {
 | |
|               /* only move a small part of the beginning of the right range to the end */
 | |
|               parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) {                  
 | |
|                   for (size_t i=r.begin(); i<r.end(); i++)
 | |
|                     prims0[i+right_size] = prims0[i];
 | |
|                 });
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|               /* no overlap, move entire right range to new location, can be made fully parallel */
 | |
|               parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE,  [&](const range<size_t>& r) {
 | |
|                   for (size_t i=r.begin(); i<r.end(); i++)
 | |
|                     prims0[i+left_ext_range_size] = prims0[i];
 | |
|                 });
 | |
|             }
 | |
|             /* update right range */
 | |
|             assert(rset.ext_end() + left_ext_range_size == set.ext_end());
 | |
|             rset.move_right(left_ext_range_size);
 | |
|           }
 | |
|         }
 | |
| 
 | |
|         /* estimates the extra space required when opening, and checks if all primitives are from same geometry */
 | |
|         __noinline std::pair<size_t,bool> getProperties(const PrimInfoExtRange& set)
 | |
|         {
 | |
|           const OpenHeuristic heuristic(set);
 | |
|           const unsigned int geomID = prims0[set.begin()].geomID();
 | |
|           
 | |
|           auto body = [&] (const range<size_t>& r) -> std::pair<size_t,bool> { 
 | |
|             bool commonGeomID = true;
 | |
|             size_t opens = 0;
 | |
|             for (size_t i=r.begin(); i<r.end(); i++) {
 | |
|               commonGeomID &= prims0[i].geomID() == geomID; 
 | |
|               if (heuristic(prims0[i]))
 | |
|                 opens += prims0[i].node.getN()-1; // coarse approximation
 | |
|             }
 | |
|             return std::pair<size_t,bool>(opens,commonGeomID); 
 | |
|           };
 | |
|           auto reduction = [&] (const std::pair<size_t,bool>& b0, const std::pair<size_t,bool>& b1) -> std::pair<size_t,bool> { 
 | |
|             return std::pair<size_t,bool>(b0.first+b1.first,b0.second && b1.second); 
 | |
|           };
 | |
|           return parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,std::pair<size_t,bool>(0,true),body,reduction);
 | |
|         }
 | |
| 
 | |
|         // FIXME: should consider maximum available extended size 
 | |
|         __noinline void openNodesBasedOnExtend(PrimInfoExtRange& set)
 | |
|         {
 | |
|           const OpenHeuristic heuristic(set);
 | |
|           const size_t ext_range_start = set.end();
 | |
| 
 | |
|           if (false && set.size() < PARALLEL_THRESHOLD) 
 | |
|           {
 | |
|             size_t extra_elements = 0;
 | |
|             for (size_t i=set.begin(); i<set.end(); i++)
 | |
|             {
 | |
|               if (heuristic(prims0[i]))
 | |
|               {
 | |
|                 PrimRef tmp[MAX_OPENED_CHILD_NODES];
 | |
|                 const size_t n = nodeOpenerFunc(prims0[i],tmp);
 | |
|                 assert(extra_elements + n-1 <= set.ext_range_size());
 | |
|                 for (size_t j=0; j<n; j++)
 | |
|                   set.extend_center2(tmp[j]);
 | |
| 
 | |
|                 prims0[i] = tmp[0];
 | |
|                 for (size_t j=1; j<n; j++)
 | |
|                   prims0[ext_range_start+extra_elements+j-1] = tmp[j]; 
 | |
|                 extra_elements += n-1;
 | |
|               }
 | |
|             }
 | |
|             set._end += extra_elements;
 | |
|           }
 | |
|           else 
 | |
|           {
 | |
|             std::atomic<size_t> ext_elements;
 | |
|             ext_elements.store(0);
 | |
|             PrimInfo info = parallel_reduce( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, PrimInfo(empty), [&](const range<size_t>& r) -> PrimInfo {
 | |
|                 PrimInfo info(empty);
 | |
|                 for (size_t i=r.begin(); i<r.end(); i++)
 | |
|                   if (heuristic(prims0[i]))
 | |
|                   {
 | |
|                     PrimRef tmp[MAX_OPENED_CHILD_NODES];
 | |
|                     const size_t n = nodeOpenerFunc(prims0[i],tmp);
 | |
|                     const size_t ID = ext_elements.fetch_add(n-1);
 | |
|                     assert(ID + n-1 <= set.ext_range_size());
 | |
| 
 | |
|                     for (size_t j=0; j<n; j++)
 | |
|                       info.extend_center2(tmp[j]);
 | |
| 
 | |
|                     prims0[i] = tmp[0];
 | |
|                     for (size_t j=1; j<n; j++)
 | |
|                       prims0[ext_range_start+ID+j-1] = tmp[j]; 
 | |
|                   }
 | |
|                 return info;
 | |
|               }, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
 | |
|             set.centBounds.extend(info.centBounds);
 | |
|             assert(ext_elements.load() <= set.ext_range_size());
 | |
|             set._end += ext_elements.load();
 | |
|           }
 | |
|         } 
 | |
| 
 | |
|         __noinline void openNodesBasedOnExtendLoop(PrimInfoExtRange& set, const size_t est_new_elements)
 | |
|         {
 | |
|           const OpenHeuristic heuristic(set);
 | |
|           size_t next_iteration_extra_elements = est_new_elements;          
 | |
|           
 | |
|           while (next_iteration_extra_elements <= set.ext_range_size()) 
 | |
|           {
 | |
|             next_iteration_extra_elements = 0;
 | |
|             size_t extra_elements = 0;
 | |
|             const size_t ext_range_start = set.end();
 | |
| 
 | |
|             for (size_t i=set.begin(); i<set.end(); i++)
 | |
|             {
 | |
|               if (heuristic(prims0[i]))
 | |
|               {
 | |
|                 PrimRef tmp[MAX_OPENED_CHILD_NODES];
 | |
|                 const size_t n = nodeOpenerFunc(prims0[i],tmp);
 | |
|                 assert(extra_elements + n-1 <= set.ext_range_size());
 | |
|                 for (size_t j=0;j<n;j++)
 | |
|                   set.extend_center2(tmp[j]);
 | |
|                   
 | |
|                 prims0[i] = tmp[0];
 | |
|                 for (size_t j=1;j<n;j++)
 | |
|                   prims0[ext_range_start+extra_elements+j-1] = tmp[j]; 
 | |
|                 extra_elements += n-1;
 | |
| 
 | |
|                 for (size_t j=0; j<n; j++)
 | |
|                   if (heuristic(tmp[j]))
 | |
|                     next_iteration_extra_elements += tmp[j].node.getN()-1; // coarse approximation
 | |
| 
 | |
|               }
 | |
|             }
 | |
|             assert( extra_elements <= set.ext_range_size());
 | |
|             set._end += extra_elements;
 | |
| 
 | |
|             for (size_t i=set.begin();i<set.end();i++)
 | |
|               assert(prims0[i].numPrimitives() > 0);
 | |
| 
 | |
|             if (unlikely(next_iteration_extra_elements == 0)) break;
 | |
|           }
 | |
|         } 
 | |
| 
 | |
|         __noinline const Split find(PrimInfoExtRange& set, const size_t logBlockSize)
 | |
|         {
 | |
|           /* single element */
 | |
|           if (set.size() <= 1)
 | |
|             return Split();
 | |
| 
 | |
|           /* disable opening if there is no overlap */
 | |
|           const size_t D = 4;
 | |
|           if (unlikely(set.has_ext_range() && set.size() <= D))
 | |
|           {
 | |
|             bool disjoint = true;
 | |
|             for (size_t j=set.begin(); j<set.end()-1; j++) {
 | |
|               for (size_t i=set.begin()+1; i<set.end(); i++) {
 | |
|                 if (conjoint(prims0[j].bounds(),prims0[i].bounds())) { 
 | |
|                   disjoint = false; break; 
 | |
|                 }
 | |
|               }
 | |
|             }
 | |
|             if (disjoint) set.set_ext_range(set.end()); /* disables opening */
 | |
|           }
 | |
| 
 | |
|           std::pair<size_t,bool> p(0,false);
 | |
| 
 | |
|           /* disable opening when all primitives are from same geometry */
 | |
|           if (unlikely(set.has_ext_range()))
 | |
|           {
 | |
|             p =  getProperties(set);
 | |
| #if EQUAL_GEOMID_STOP_CRITERIA == 1
 | |
|             if (p.second) set.set_ext_range(set.end()); /* disable opening */
 | |
| #endif         
 | |
|           }
 | |
| 
 | |
|           /* open nodes when we have sufficient space available */
 | |
|           if (unlikely(set.has_ext_range()))
 | |
|           {
 | |
| #if USE_LOOP_OPENING == 1
 | |
|             openNodesBasedOnExtendLoop(set,p.first);
 | |
| #else
 | |
|             if (p.first <= set.ext_range_size())
 | |
|               openNodesBasedOnExtend(set);
 | |
| #endif
 | |
| 
 | |
|             /* disable opening when insufficient space for opening a node available */
 | |
|             if (set.ext_range_size() < max_open_size-1) 
 | |
|               set.set_ext_range(set.end()); /* disable opening */
 | |
|           }
 | |
|                     
 | |
|           /* find best split */
 | |
|           return object_find(set,logBlockSize);
 | |
|         }
 | |
| 
 | |
| 
 | |
|         /*! finds the best object split */
 | |
|         __forceinline const Split object_find(const PrimInfoExtRange& set,const size_t logBlockSize)
 | |
|         {
 | |
|           if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize);
 | |
|           else                                 return parallel_object_find  (set,logBlockSize);
 | |
|         }
 | |
| 
 | |
|         /*! finds the best object split */
 | |
|         __noinline const Split sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)
 | |
|         {
 | |
|           Binner binner(empty); 
 | |
|           const BinMapping<OBJECT_BINS> mapping(set.centBounds);
 | |
|           binner.bin(prims0,set.begin(),set.end(),mapping);
 | |
|           return binner.best(mapping,logBlockSize);
 | |
|         }
 | |
| 
 | |
|         /*! finds the best split */
 | |
|         __noinline const Split parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)
 | |
|         {
 | |
|           Binner binner(empty);
 | |
|           const BinMapping<OBJECT_BINS> mapping(set.centBounds);
 | |
|           const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
 | |
|           auto body = [&] (const range<size_t>& r) -> Binner { 
 | |
|             Binner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner; 
 | |
|           };
 | |
|           auto reduction = [&] (const Binner& b0, const Binner& b1) -> Binner { 
 | |
|             Binner r = b0; r.merge(b1,_mapping.size()); return r; 
 | |
|           };
 | |
|           binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,body,reduction);
 | |
|           return binner.best(mapping,logBlockSize);
 | |
|         }
 | |
|         
 | |
|         /*! array partitioning */
 | |
|         __noinline void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset) 
 | |
|         {
 | |
|           PrimInfoExtRange set = set_i;
 | |
| 
 | |
|           /* valid split */
 | |
|           if (unlikely(!split.valid())) {
 | |
|             deterministic_order(set);
 | |
|             splitFallback(set,lset,rset);
 | |
|             return;
 | |
|           }
 | |
| 
 | |
|           std::pair<size_t,size_t> ext_weights(0,0);
 | |
| 
 | |
|           /* object split */
 | |
|           if (likely(set.size() < PARALLEL_THRESHOLD)) 
 | |
|             ext_weights = sequential_object_split(split,set,lset,rset);
 | |
|           else
 | |
|             ext_weights = parallel_object_split(split,set,lset,rset);
 | |
| 
 | |
|           /* if we have an extended range, set extended child ranges and move right split range */
 | |
|           if (unlikely(set.has_ext_range())) 
 | |
|           {
 | |
|             setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second);
 | |
|             moveExtentedRange(set,lset,rset);
 | |
|           }
 | |
|         }
 | |
| 
 | |
|         /*! array partitioning */
 | |
|         std::pair<size_t,size_t> sequential_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset) 
 | |
|         {
 | |
|           const size_t begin = set.begin();
 | |
|           const size_t end   = set.end();
 | |
|           PrimInfo local_left(empty);
 | |
|           PrimInfo local_right(empty);
 | |
|           const unsigned int splitPos = split.pos;
 | |
|           const unsigned int splitDim = split.dim;
 | |
|           const unsigned int splitDimMask = (unsigned int)1 << splitDim; 
 | |
| 
 | |
|           const vint4 vSplitPos(splitPos);
 | |
|           const vbool4 vSplitMask( (int)splitDimMask );
 | |
| 
 | |
|           size_t center = serial_partitioning(prims0,
 | |
|                                               begin,end,local_left,local_right,
 | |
|                                               [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); },
 | |
|                                               [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); });          
 | |
|           
 | |
|           new (&lset) PrimInfoExtRange(begin,center,center,local_left);
 | |
|           new (&rset) PrimInfoExtRange(center,end,end,local_right);
 | |
|           assert(area(lset.geomBounds) >= 0.0f);
 | |
|           assert(area(rset.geomBounds) >= 0.0f);
 | |
|           return std::pair<size_t,size_t>(local_left.size(),local_right.size());
 | |
|         }
 | |
| 
 | |
|         /*! array partitioning */
 | |
|         __noinline std::pair<size_t,size_t> parallel_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
 | |
|         {
 | |
|           const size_t begin = set.begin();
 | |
|           const size_t end   = set.end();
 | |
|           PrimInfo left(empty);
 | |
|           PrimInfo right(empty);
 | |
|           const unsigned int splitPos = split.pos;
 | |
|           const unsigned int splitDim = split.dim;
 | |
|           const unsigned int splitDimMask = (unsigned int)1 << splitDim;
 | |
| 
 | |
|           const vint4 vSplitPos(splitPos);
 | |
|           const vbool4 vSplitMask( (int)splitDimMask );
 | |
|           auto isLeft = [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
 | |
| 
 | |
|           const size_t center = parallel_partitioning(
 | |
|             prims0,begin,end,EmptyTy(),left,right,isLeft,
 | |
|             [] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); },
 | |
|             [] (PrimInfo& pinfo0,const PrimInfo& pinfo1) { pinfo0.merge(pinfo1); },
 | |
|             PARALLEL_PARTITION_BLOCK_SIZE);
 | |
| 
 | |
|           new (&lset) PrimInfoExtRange(begin,center,center,left);
 | |
|           new (&rset) PrimInfoExtRange(center,end,end,right);
 | |
|           assert(area(lset.geomBounds) >= 0.0f);
 | |
|           assert(area(rset.geomBounds) >= 0.0f);
 | |
| 
 | |
|           return std::pair<size_t,size_t>(left.size(),right.size());
 | |
|         }
 | |
| 
 | |
|         void deterministic_order(const extended_range<size_t>& set) 
 | |
|         {
 | |
|           /* required as parallel partition destroys original primitive order */
 | |
|           std::sort(&prims0[set.begin()],&prims0[set.end()]);
 | |
|         }
 | |
| 
 | |
|         __forceinline void splitFallback(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
 | |
|         {
 | |
|           const size_t begin = set.begin();
 | |
|           const size_t end   = set.end();
 | |
|           const size_t center = (begin + end)/2;
 | |
| 
 | |
|           PrimInfo left(empty);
 | |
|           for (size_t i=begin; i<center; i++)
 | |
|             left.add_center2(prims0[i]);
 | |
| 
 | |
|           const size_t lweight = left.end;
 | |
|           
 | |
|           PrimInfo right(empty);
 | |
|           for (size_t i=center; i<end; i++)
 | |
|             right.add_center2(prims0[i]);	
 | |
| 
 | |
|           const size_t rweight = right.end;
 | |
|           new (&lset) PrimInfoExtRange(begin,center,center,left);
 | |
|           new (&rset) PrimInfoExtRange(center,end,end,right);
 | |
| 
 | |
|           /* if we have an extended range */
 | |
|           if (set.has_ext_range()) 
 | |
|           {
 | |
|             setExtentedRanges(set,lset,rset,lweight,rweight);
 | |
|             moveExtentedRange(set,lset,rset);
 | |
|           }
 | |
|         }
 | |
|         
 | |
|       private:
 | |
|         PrimRef* const prims0;
 | |
|         const NodeOpenerFunc& nodeOpenerFunc;
 | |
|         size_t max_open_size;
 | |
|       };
 | |
|   }
 | |
| }
 | 
