mirror of
				https://github.com/godotengine/godot.git
				synced 2025-10-31 13:41:03 +00:00 
			
		
		
		
	
		
			
	
	
		
			182 lines
		
	
	
	
		
			6.7 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
		
		
			
		
	
	
			182 lines
		
	
	
	
		
			6.7 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
|   | #pragma once
 | ||
|  | #ifndef __CVTT_ENDPOINTREFINER_H__
 | ||
|  | #define __CVTT_ENDPOINTREFINER_H__
 | ||
|  | 
 | ||
|  | #include "ConvectionKernels_ParallelMath.h"
 | ||
|  | 
 | ||
|  | namespace cvtt | ||
|  | { | ||
|  |     namespace Internal | ||
|  |     { | ||
|  |         // Solve for a, b where v = a*t + b
 | ||
|  |         // This allows endpoints to be mapped to where T=0 and T=1
 | ||
|  |         // Least squares from totals:
 | ||
|  |         // a = (tv - t*v/w)/(tt - t*t/w)
 | ||
|  |         // b = (v - a*t)/w
 | ||
|  |         template<int TVectorSize> | ||
|  |         class EndpointRefiner | ||
|  |         { | ||
|  |         public: | ||
|  |             typedef ParallelMath::Float MFloat; | ||
|  |             typedef ParallelMath::UInt16 MUInt16; | ||
|  |             typedef ParallelMath::UInt15 MUInt15; | ||
|  |             typedef ParallelMath::AInt16 MAInt16; | ||
|  |             typedef ParallelMath::SInt16 MSInt16; | ||
|  |             typedef ParallelMath::SInt32 MSInt32; | ||
|  | 
 | ||
|  |             MFloat m_tv[TVectorSize]; | ||
|  |             MFloat m_v[TVectorSize]; | ||
|  |             MFloat m_tt; | ||
|  |             MFloat m_t; | ||
|  |             MFloat m_w; | ||
|  |             int m_wu; | ||
|  | 
 | ||
|  |             float m_rcpMaxIndex; | ||
|  |             float m_channelWeights[TVectorSize]; | ||
|  |             float m_rcpChannelWeights[TVectorSize]; | ||
|  | 
 | ||
|  |             void Init(int indexRange, const float channelWeights[TVectorSize]) | ||
|  |             { | ||
|  |                 for (int ch = 0; ch < TVectorSize; ch++) | ||
|  |                 { | ||
|  |                     m_tv[ch] = ParallelMath::MakeFloatZero(); | ||
|  |                     m_v[ch] = ParallelMath::MakeFloatZero(); | ||
|  |                 } | ||
|  |                 m_tt = ParallelMath::MakeFloatZero(); | ||
|  |                 m_t = ParallelMath::MakeFloatZero(); | ||
|  |                 m_w = ParallelMath::MakeFloatZero(); | ||
|  | 
 | ||
|  |                 m_rcpMaxIndex = 1.0f / static_cast<float>(indexRange - 1); | ||
|  | 
 | ||
|  |                 for (int ch = 0; ch < TVectorSize; ch++) | ||
|  |                 { | ||
|  |                     m_channelWeights[ch] = channelWeights[ch]; | ||
|  |                     m_rcpChannelWeights[ch] = 1.0f; | ||
|  |                     if (m_channelWeights[ch] != 0.0f) | ||
|  |                         m_rcpChannelWeights[ch] = 1.0f / channelWeights[ch]; | ||
|  |                 } | ||
|  | 
 | ||
|  |                 m_wu = 0; | ||
|  |             } | ||
|  | 
 | ||
|  |             void ContributePW(const MFloat *pwFloatPixel, const MUInt15 &index, const MFloat &weight) | ||
|  |             { | ||
|  |                 MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex; | ||
|  | 
 | ||
|  |                 for (int ch = 0; ch < TVectorSize; ch++) | ||
|  |                 { | ||
|  |                     MFloat v = pwFloatPixel[ch] * weight; | ||
|  | 
 | ||
|  |                     m_tv[ch] = m_tv[ch] + t * v; | ||
|  |                     m_v[ch] = m_v[ch] + v; | ||
|  |                 } | ||
|  |                 m_tt = m_tt + weight * t * t; | ||
|  |                 m_t = m_t + weight * t; | ||
|  |                 m_w = m_w + weight; | ||
|  |             } | ||
|  | 
 | ||
|  |             void ContributeUnweightedPW(const MFloat *pwFloatPixel, const MUInt15 &index, int numRealChannels) | ||
|  |             { | ||
|  |                 MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex; | ||
|  | 
 | ||
|  |                 for (int ch = 0; ch < numRealChannels; ch++) | ||
|  |                 { | ||
|  |                     MFloat v = pwFloatPixel[ch]; | ||
|  | 
 | ||
|  |                     m_tv[ch] = m_tv[ch] + t * v; | ||
|  |                     m_v[ch] = m_v[ch] + v; | ||
|  |                 } | ||
|  |                 m_tt = m_tt + t * t; | ||
|  |                 m_t = m_t + t; | ||
|  |                 m_wu++; | ||
|  |             } | ||
|  | 
 | ||
|  |             void ContributeUnweightedPW(const MFloat *floatPixel, const MUInt15 &index) | ||
|  |             { | ||
|  |                 ContributeUnweightedPW(floatPixel, index, TVectorSize); | ||
|  |             } | ||
|  | 
 | ||
|  |             void GetRefinedEndpoints(MFloat endPoint[2][TVectorSize]) | ||
|  |             { | ||
|  |                 // a = (tv - t*v/w)/(tt - t*t/w)
 | ||
|  |                 // b = (v - a*t)/w
 | ||
|  |                 MFloat w = m_w + ParallelMath::MakeFloat(static_cast<float>(m_wu)); | ||
|  | 
 | ||
|  |                 ParallelMath::MakeSafeDenominator(w); | ||
|  |                 MFloat wRcp = ParallelMath::Reciprocal(w); | ||
|  | 
 | ||
|  |                 MFloat adenom = (m_tt * w - m_t * m_t) * wRcp; | ||
|  | 
 | ||
|  |                 ParallelMath::FloatCompFlag adenomZero = ParallelMath::Equal(adenom, ParallelMath::MakeFloatZero()); | ||
|  |                 ParallelMath::ConditionalSet(adenom, adenomZero, ParallelMath::MakeFloat(1.0f)); | ||
|  | 
 | ||
|  |                 for (int ch = 0; ch < TVectorSize; ch++) | ||
|  |                 { | ||
|  |                     /*
 | ||
|  |                     if (adenom == 0.0) | ||
|  |                     p1 = p2 = er.v / er.w; | ||
|  |                     else | ||
|  |                     { | ||
|  |                     float4 a = (er.tv - er.t*er.v / er.w) / adenom; | ||
|  |                     float4 b = (er.v - a * er.t) / er.w; | ||
|  |                     p1 = b; | ||
|  |                     p2 = a + b; | ||
|  |                     } | ||
|  |                     */ | ||
|  | 
 | ||
|  |                     MFloat a = (m_tv[ch] - m_t * m_v[ch] * wRcp) / adenom; | ||
|  |                     MFloat b = (m_v[ch] - a * m_t) * wRcp; | ||
|  | 
 | ||
|  |                     MFloat p1 = b; | ||
|  |                     MFloat p2 = a + b; | ||
|  | 
 | ||
|  |                     ParallelMath::ConditionalSet(p1, adenomZero, (m_v[ch] * wRcp)); | ||
|  |                     ParallelMath::ConditionalSet(p2, adenomZero, p1); | ||
|  | 
 | ||
|  |                     // Unweight
 | ||
|  |                     float inverseWeight = m_rcpChannelWeights[ch]; | ||
|  | 
 | ||
|  |                     endPoint[0][ch] = p1 * inverseWeight; | ||
|  |                     endPoint[1][ch] = p2 * inverseWeight; | ||
|  |                 } | ||
|  |             } | ||
|  | 
 | ||
|  |             void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], int numRealChannels, const ParallelMath::RoundTowardNearestForScope *roundingMode) | ||
|  |             { | ||
|  |                 MFloat floatEndPoint[2][TVectorSize]; | ||
|  |                 GetRefinedEndpoints(floatEndPoint); | ||
|  | 
 | ||
|  |                 for (int epi = 0; epi < 2; epi++) | ||
|  |                     for (int ch = 0; ch < TVectorSize; ch++) | ||
|  |                         endPoint[epi][ch] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(floatEndPoint[epi][ch], 0.0f, 255.0f), roundingMode); | ||
|  |             } | ||
|  | 
 | ||
|  |             void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], const ParallelMath::RoundTowardNearestForScope *roundingMode) | ||
|  |             { | ||
|  |                 GetRefinedEndpointsLDR(endPoint, TVectorSize, roundingMode); | ||
|  |             } | ||
|  | 
 | ||
|  |             void GetRefinedEndpointsHDR(MSInt16 endPoint[2][TVectorSize], bool isSigned, const ParallelMath::RoundTowardNearestForScope *roundingMode) | ||
|  |             { | ||
|  |                 MFloat floatEndPoint[2][TVectorSize]; | ||
|  |                 GetRefinedEndpoints(floatEndPoint); | ||
|  | 
 | ||
|  |                 for (int epi = 0; epi < 2; epi++) | ||
|  |                 { | ||
|  |                     for (int ch = 0; ch < TVectorSize; ch++) | ||
|  |                     { | ||
|  |                         MFloat f = floatEndPoint[epi][ch]; | ||
|  |                         if (isSigned) | ||
|  |                             endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToS16(ParallelMath::Clamp(f, -31743.0f, 31743.0f), roundingMode)); | ||
|  |                         else | ||
|  |                             endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(f, 0.0f, 31743.0f), roundingMode)); | ||
|  |                     } | ||
|  |                 } | ||
|  |             } | ||
|  |         }; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | #endif
 | ||
|  | 
 |