mirror of
				https://github.com/godotengine/godot.git
				synced 2025-11-01 06:01:14 +00:00 
			
		
		
		
	
		
			
	
	
		
			51 lines
		
	
	
	
		
			1.3 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
		
		
			
		
	
	
			51 lines
		
	
	
	
		
			1.3 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
|   | // Copyright 2009-2020 Intel Corporation
 | ||
|  | // SPDX-License-Identifier: Apache-2.0
 | ||
|  | 
 | ||
|  | #pragma once
 | ||
|  | 
 | ||
|  | /* Make precision match SSE, at the cost of some performance */ | ||
|  | #if !defined(__aarch64__)
 | ||
|  | #  define SSE2NEON_PRECISE_DIV 1
 | ||
|  | #  define SSE2NEON_PRECISE_SQRT 1
 | ||
|  | #endif
 | ||
|  | 
 | ||
|  | #include "sse2neon.h"
 | ||
|  | 
 | ||
|  | __forceinline __m128 _mm_fmsub_ps(__m128 a, __m128 b, __m128 c) { | ||
|  |    __m128 neg_c = vreinterpretq_m128_f32(vnegq_f32(vreinterpretq_f32_m128(c))); | ||
|  |   return _mm_fmadd_ps(a, b, neg_c); | ||
|  | } | ||
|  | 
 | ||
|  | __forceinline __m128 _mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) { | ||
|  | #if defined(__aarch64__)
 | ||
|  |     return vreinterpretq_m128_f32(vfmsq_f32(vreinterpretq_f32_m128(c), | ||
|  |                                             vreinterpretq_f32_m128(b), | ||
|  |                                             vreinterpretq_f32_m128(a))); | ||
|  | #else
 | ||
|  |     return _mm_sub_ps(c, _mm_mul_ps(a, b)); | ||
|  | #endif
 | ||
|  | } | ||
|  | 
 | ||
|  | __forceinline __m128 _mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) { | ||
|  |   return vreinterpretq_m128_f32(vnegq_f32(vreinterpretq_f32_m128(_mm_fmadd_ps(a,b,c)))); | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | /* Dummy defines for floating point control */ | ||
|  | #define _MM_MASK_MASK 0x1f80
 | ||
|  | #define _MM_MASK_DIV_ZERO 0x200
 | ||
|  | #define _MM_FLUSH_ZERO_ON 0x8000
 | ||
|  | #define _MM_MASK_DENORM 0x100
 | ||
|  | #define _MM_SET_EXCEPTION_MASK(x)
 | ||
|  | #define _MM_SET_FLUSH_ZERO_MODE(x)
 | ||
|  | 
 | ||
|  | __forceinline int _mm_getcsr() | ||
|  | { | ||
|  |   return 0; | ||
|  | } | ||
|  | 
 | ||
|  | __forceinline void _mm_mfence() | ||
|  | { | ||
|  |   __sync_synchronize(); | ||
|  | } |