mirror of
				https://github.com/godotengine/godot.git
				synced 2025-10-31 05:31:01 +00:00 
			
		
		
		
	 767e374dce
			
		
	
	
		767e374dce
		
	
	
	
	
		
			
			Since Embree v3.13.0 supports AARCH64, switch back to the official repo instead of using Embree-aarch64. `thirdparty/embree/patches/godot-changes.patch` should now contain an accurate diff of the changes done to the library.
		
			
				
	
	
		
			294 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			294 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Copyright 2009-2021 Intel Corporation
 | |
| // SPDX-License-Identifier: Apache-2.0
 | |
| 
 | |
| #pragma once
 | |
| 
 | |
| #include "geometry.h"
 | |
| #include "buffer.h"
 | |
| 
 | |
| namespace embree
 | |
| {
 | |
|   /*! Grid Mesh */
 | |
|   struct GridMesh : public Geometry
 | |
|   {
 | |
|     /*! type of this geometry */
 | |
|     static const Geometry::GTypeMask geom_type = Geometry::MTY_GRID_MESH;
 | |
| 
 | |
|     /*! grid */
 | |
|     struct Grid 
 | |
|     {
 | |
|       unsigned int startVtxID;
 | |
|       unsigned int lineVtxOffset;
 | |
|       unsigned short resX,resY;
 | |
| 
 | |
|       /* border flags due to 3x3 vertex pattern */
 | |
|       __forceinline unsigned int get3x3FlagsX(const unsigned int x) const
 | |
|       {
 | |
|         return (x + 2 >= (unsigned int)resX) ? (1<<15) : 0;
 | |
|       }
 | |
| 
 | |
|       /* border flags due to 3x3 vertex pattern */
 | |
|       __forceinline unsigned int get3x3FlagsY(const unsigned int y) const
 | |
|       {
 | |
|         return (y + 2 >= (unsigned int)resY) ? (1<<15) : 0;
 | |
|       }
 | |
| 
 | |
|       /*! outputs grid structure */
 | |
|       __forceinline friend embree_ostream operator<<(embree_ostream cout, const Grid& t) {
 | |
|         return cout << "Grid { startVtxID " << t.startVtxID << ", lineVtxOffset " << t.lineVtxOffset << ", resX " << t.resX << ", resY " << t.resY << " }";
 | |
|       }
 | |
|     };
 | |
| 
 | |
|   public:
 | |
| 
 | |
|     /*! grid mesh construction */
 | |
|     GridMesh (Device* device); 
 | |
| 
 | |
|     /* geometry interface */
 | |
|   public:
 | |
|     void setMask(unsigned mask);
 | |
|     void setNumTimeSteps (unsigned int numTimeSteps);
 | |
|     void setVertexAttributeCount (unsigned int N);
 | |
|     void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
 | |
|     void* getBuffer(RTCBufferType type, unsigned int slot);
 | |
|     void updateBuffer(RTCBufferType type, unsigned int slot);
 | |
|     void commit();
 | |
|     bool verify();
 | |
|     void interpolate(const RTCInterpolateArguments* const args);
 | |
| 
 | |
|     template<int N>
 | |
|     void interpolate_impl(const RTCInterpolateArguments* const args)
 | |
|     {
 | |
|       unsigned int primID = args->primID;
 | |
|       float U = args->u;
 | |
|       float V = args->v;
 | |
|       
 | |
|       /* clamp input u,v to [0;1] range */
 | |
|       U = max(min(U,1.0f),0.0f);
 | |
|       V = max(min(V,1.0f),0.0f);
 | |
|       
 | |
|       RTCBufferType bufferType = args->bufferType;
 | |
|       unsigned int bufferSlot = args->bufferSlot;
 | |
|       float* P = args->P;
 | |
|       float* dPdu = args->dPdu;
 | |
|       float* dPdv = args->dPdv;
 | |
|       float* ddPdudu = args->ddPdudu;
 | |
|       float* ddPdvdv = args->ddPdvdv;
 | |
|       float* ddPdudv = args->ddPdudv;
 | |
|       unsigned int valueCount = args->valueCount;
 | |
|       
 | |
|       /* calculate base pointer and stride */
 | |
|       assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) ||
 | |
|              (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size()));
 | |
|       const char* src = nullptr; 
 | |
|       size_t stride = 0;
 | |
|       if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) {
 | |
|         src    = vertexAttribs[bufferSlot].getPtr();
 | |
|         stride = vertexAttribs[bufferSlot].getStride();
 | |
|       } else {
 | |
|         src    = vertices[bufferSlot].getPtr();
 | |
|         stride = vertices[bufferSlot].getStride();
 | |
|       }
 | |
|       
 | |
|       const Grid& grid = grids[primID];
 | |
|       const int grid_width  = grid.resX-1;
 | |
|       const int grid_height = grid.resY-1;
 | |
|       const float rcp_grid_width = rcp(float(grid_width));
 | |
|       const float rcp_grid_height = rcp(float(grid_height));
 | |
|       const int iu = min((int)floor(U*grid_width ),grid_width);
 | |
|       const int iv = min((int)floor(V*grid_height),grid_height);
 | |
|       const float u = U*grid_width-float(iu);
 | |
|       const float v = V*grid_height-float(iv);
 | |
|       
 | |
|       for (unsigned int i=0; i<valueCount; i+=N)
 | |
|       {
 | |
|         const size_t ofs = i*sizeof(float);
 | |
|         const unsigned int idx0 = grid.startVtxID + (iv+0)*grid.lineVtxOffset + iu;
 | |
|         const unsigned int idx1 = grid.startVtxID + (iv+1)*grid.lineVtxOffset + iu;
 | |
|         
 | |
|         const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>(int(valueCount));
 | |
|         const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx0+0)*stride+ofs]);
 | |
|         const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx0+1)*stride+ofs]);
 | |
|         const vfloat<N> p2 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx1+1)*stride+ofs]);
 | |
|         const vfloat<N> p3 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx1+0)*stride+ofs]);
 | |
|         const vbool<N> left = u+v <= 1.0f;
 | |
|         const vfloat<N> Q0 = select(left,p0,p2);
 | |
|         const vfloat<N> Q1 = select(left,p1,p3);
 | |
|         const vfloat<N> Q2 = select(left,p3,p1);
 | |
|         const vfloat<N> U  = select(left,u,vfloat<N>(1.0f)-u);
 | |
|         const vfloat<N> V  = select(left,v,vfloat<N>(1.0f)-v);
 | |
|         const vfloat<N> W  = 1.0f-U-V;
 | |
|         
 | |
|         if (P) {
 | |
|           mem<vfloat<N>>::storeu(valid,P+i,madd(W,Q0,madd(U,Q1,V*Q2)));
 | |
|         }
 | |
|         if (dPdu) { 
 | |
|           assert(dPdu); mem<vfloat<N>>::storeu(valid,dPdu+i,select(left,Q1-Q0,Q0-Q1)*rcp_grid_width);
 | |
|           assert(dPdv); mem<vfloat<N>>::storeu(valid,dPdv+i,select(left,Q2-Q0,Q0-Q2)*rcp_grid_height);
 | |
|         }
 | |
|         if (ddPdudu) { 
 | |
|           assert(ddPdudu); mem<vfloat<N>>::storeu(valid,ddPdudu+i,vfloat<N>(zero));
 | |
|           assert(ddPdvdv); mem<vfloat<N>>::storeu(valid,ddPdvdv+i,vfloat<N>(zero));
 | |
|           assert(ddPdudv); mem<vfloat<N>>::storeu(valid,ddPdudv+i,vfloat<N>(zero));
 | |
|         }
 | |
|       }
 | |
|     }
 | |
|     
 | |
|     void addElementsToCount (GeometryCounts & counts) const;
 | |
|     
 | |
|     __forceinline unsigned int getNumSubGrids(const size_t gridID)
 | |
|     {
 | |
|       const Grid &g = grid(gridID);
 | |
|       return max((unsigned int)1,((unsigned int)g.resX >> 1) * ((unsigned int)g.resY >> 1));
 | |
|     }
 | |
| 
 | |
|     /*! get fast access to first vertex buffer */
 | |
|     __forceinline float * getCompactVertexArray () const {
 | |
|       return (float*) vertices0.getPtr();
 | |
|     }
 | |
| 
 | |
|   public:
 | |
| 
 | |
|     /*! returns number of vertices */
 | |
|     __forceinline size_t numVertices() const {
 | |
|       return vertices[0].size();
 | |
|     }
 | |
|     
 | |
|     /*! returns i'th grid*/
 | |
|     __forceinline const Grid& grid(size_t i) const {
 | |
|       return grids[i];
 | |
|     }
 | |
| 
 | |
|     /*! returns i'th vertex of the first time step  */
 | |
|     __forceinline const Vec3fa vertex(size_t i) const { // FIXME: check if this does a unaligned load
 | |
|       return vertices0[i];
 | |
|     }
 | |
| 
 | |
|     /*! returns i'th vertex of the first time step */
 | |
|     __forceinline const char* vertexPtr(size_t i) const {
 | |
|       return vertices0.getPtr(i);
 | |
|     }
 | |
| 
 | |
|     /*! returns i'th vertex of itime'th timestep */
 | |
|     __forceinline const Vec3fa vertex(size_t i, size_t itime) const {
 | |
|       return vertices[itime][i];
 | |
|     }
 | |
| 
 | |
|     /*! returns i'th vertex of itime'th timestep */
 | |
|     __forceinline const char* vertexPtr(size_t i, size_t itime) const {
 | |
|       return vertices[itime].getPtr(i);
 | |
|     }
 | |
| 
 | |
|     /*! returns i'th vertex of the first timestep */
 | |
|     __forceinline size_t grid_vertex_index(const Grid& g, size_t x, size_t y) const {
 | |
|       assert(x < (size_t)g.resX);
 | |
|       assert(y < (size_t)g.resY);
 | |
|       return g.startVtxID + x + y * g.lineVtxOffset;
 | |
|     }
 | |
|     
 | |
|     /*! returns i'th vertex of the first timestep */
 | |
|     __forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y) const {
 | |
|       const size_t index = grid_vertex_index(g,x,y);
 | |
|       return vertex(index);
 | |
|     }
 | |
| 
 | |
|     /*! returns i'th vertex of the itime'th timestep */
 | |
|     __forceinline const Vec3fa grid_vertex(const Grid& g, size_t x, size_t y, size_t itime) const {
 | |
|       const size_t index = grid_vertex_index(g,x,y);
 | |
|       return vertex(index,itime);
 | |
|     }
 | |
| 
 | |
|     /*! calculates the build bounds of the i'th primitive, if it's valid */
 | |
|     __forceinline bool buildBounds(const Grid& g, size_t sx, size_t sy, BBox3fa& bbox) const
 | |
|     {
 | |
|       BBox3fa b(empty);
 | |
|       for (size_t t=0; t<numTimeSteps; t++)
 | |
|       {
 | |
|         for (size_t y=sy;y<min(sy+3,(size_t)g.resY);y++)
 | |
|           for (size_t x=sx;x<min(sx+3,(size_t)g.resX);x++)
 | |
|           {
 | |
|             const Vec3fa v = grid_vertex(g,x,y,t);
 | |
|             if (unlikely(!isvalid(v))) return false;
 | |
|             b.extend(v);
 | |
|           }
 | |
|       }
 | |
| 
 | |
|       bbox = b;
 | |
|       return true;
 | |
|     }
 | |
| 
 | |
|     /*! calculates the build bounds of the i'th primitive at the itime'th time segment, if it's valid */
 | |
|     __forceinline bool buildBounds(const Grid& g, size_t sx, size_t sy, size_t itime, BBox3fa& bbox) const
 | |
|     {
 | |
|       assert(itime < numTimeSteps);
 | |
|       BBox3fa b0(empty);
 | |
|       for (size_t y=sy;y<min(sy+3,(size_t)g.resY);y++)
 | |
|         for (size_t x=sx;x<min(sx+3,(size_t)g.resX);x++)
 | |
|         {
 | |
|           const Vec3fa v = grid_vertex(g,x,y,itime);
 | |
|           if (unlikely(!isvalid(v))) return false;
 | |
|           b0.extend(v);
 | |
|         }
 | |
| 
 | |
|       /* use bounds of first time step in builder */
 | |
|       bbox = b0;
 | |
|       return true;
 | |
|     }
 | |
| 
 | |
|     __forceinline bool valid(size_t gridID, size_t itime=0) const {
 | |
|       return valid(gridID, make_range(itime, itime));
 | |
|     }
 | |
| 
 | |
|     /*! check if the i'th primitive is valid between the specified time range */
 | |
|     __forceinline bool valid(size_t gridID, const range<size_t>& itime_range) const
 | |
|     {
 | |
|       if (unlikely(gridID >= grids.size())) return false;
 | |
|       const Grid &g = grid(gridID);
 | |
|       if (unlikely(g.startVtxID + 0                                     >= vertices0.size())) return false;
 | |
|       if (unlikely(g.startVtxID + (g.resY-1)*g.lineVtxOffset + g.resX-1 >= vertices0.size())) return false;
 | |
| 
 | |
|       for (size_t y=0;y<g.resY;y++)
 | |
|         for (size_t x=0;x<g.resX;x++)
 | |
|           for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++)
 | |
|             if (!isvalid(grid_vertex(g,x,y,itime))) return false;
 | |
|       return true;
 | |
|     }
 | |
| 
 | |
| 
 | |
|     __forceinline BBox3fa bounds(const Grid& g, size_t sx, size_t sy, size_t itime) const
 | |
|     {
 | |
|       BBox3fa box(empty);
 | |
|       buildBounds(g,sx,sy,itime,box);
 | |
|       return box;
 | |
|     }
 | |
| 
 | |
|     __forceinline LBBox3fa linearBounds(const Grid& g, size_t sx, size_t sy, size_t itime) const {
 | |
|       BBox3fa bounds0, bounds1;
 | |
|       buildBounds(g,sx,sy,itime+0,bounds0);
 | |
|       buildBounds(g,sx,sy,itime+1,bounds1);
 | |
|       return LBBox3fa(bounds0,bounds1);
 | |
|     }
 | |
| 
 | |
|     /*! calculates the linear bounds of the i'th primitive for the specified time range */
 | |
|     __forceinline LBBox3fa linearBounds(const Grid& g, size_t sx, size_t sy, const BBox1f& dt) const {
 | |
|       return LBBox3fa([&] (size_t itime) { return bounds(g,sx,sy,itime); }, dt, time_range, fnumTimeSegments);
 | |
|     }
 | |
| 
 | |
|   public:
 | |
|     BufferView<Grid> grids;      //!< array of triangles
 | |
|     BufferView<Vec3fa> vertices0;        //!< fast access to first vertex buffer
 | |
|     vector<BufferView<Vec3fa>> vertices; //!< vertex array for each timestep
 | |
|     vector<RawBufferView> vertexAttribs; //!< vertex attributes
 | |
|   };
 | |
| 
 | |
|   namespace isa
 | |
|   {
 | |
|     struct GridMeshISA : public GridMesh
 | |
|     {
 | |
|       GridMeshISA (Device* device)
 | |
|         : GridMesh(device) {}
 | |
|     };
 | |
|   }
 | |
| 
 | |
|   DECLARE_ISA_FUNCTION(GridMesh*, createGridMesh, Device*);
 | |
| }
 |