mirror of
				https://github.com/godotengine/godot.git
				synced 2025-11-04 07:31:16 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			1021 lines
		
	
	
	
		
			31 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			1021 lines
		
	
	
	
		
			31 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/*
 | 
						|
Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
 | 
						|
 | 
						|
This software is provided 'as-is', without any express or implied warranty.
 | 
						|
In no event will the authors be held liable for any damages arising from the use of this software.
 | 
						|
Permission is granted to anyone to use this software for any purpose, 
 | 
						|
including commercial applications, and to alter it and redistribute it freely, 
 | 
						|
subject to the following restrictions:
 | 
						|
 | 
						|
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
 | 
						|
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 | 
						|
3. This notice may not be removed or altered from any source distribution.
 | 
						|
*/
 | 
						|
 | 
						|
#ifndef BT_SIMD__QUATERNION_H_
 | 
						|
#define BT_SIMD__QUATERNION_H_
 | 
						|
 | 
						|
#include "btVector3.h"
 | 
						|
#include "btQuadWord.h"
 | 
						|
 | 
						|
#ifdef BT_USE_DOUBLE_PRECISION
 | 
						|
#define btQuaternionData btQuaternionDoubleData
 | 
						|
#define btQuaternionDataName "btQuaternionDoubleData"
 | 
						|
#else
 | 
						|
#define btQuaternionData btQuaternionFloatData
 | 
						|
#define btQuaternionDataName "btQuaternionFloatData"
 | 
						|
#endif  //BT_USE_DOUBLE_PRECISION
 | 
						|
 | 
						|
#ifdef BT_USE_SSE
 | 
						|
 | 
						|
//const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
 | 
						|
#define vOnes (_mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f))
 | 
						|
 | 
						|
#endif
 | 
						|
 | 
						|
#if defined(BT_USE_SSE)
 | 
						|
 | 
						|
#define vQInv (_mm_set_ps(+0.0f, -0.0f, -0.0f, -0.0f))
 | 
						|
#define vPPPM (_mm_set_ps(-0.0f, +0.0f, +0.0f, +0.0f))
 | 
						|
 | 
						|
#elif defined(BT_USE_NEON)
 | 
						|
 | 
						|
const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
 | 
						|
const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
 | 
						|
 | 
						|
#endif
 | 
						|
 | 
						|
/**@brief The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatrix3x3, btVector3 and btTransform. */
 | 
						|
class btQuaternion : public btQuadWord
 | 
						|
{
 | 
						|
public:
 | 
						|
	/**@brief No initialization constructor */
 | 
						|
	btQuaternion() {}
 | 
						|
 | 
						|
#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
 | 
						|
	// Set Vector
 | 
						|
	SIMD_FORCE_INLINE btQuaternion(const btSimdFloat4 vec)
 | 
						|
	{
 | 
						|
		mVec128 = vec;
 | 
						|
	}
 | 
						|
 | 
						|
	// Copy constructor
 | 
						|
	SIMD_FORCE_INLINE btQuaternion(const btQuaternion& rhs)
 | 
						|
	{
 | 
						|
		mVec128 = rhs.mVec128;
 | 
						|
	}
 | 
						|
 | 
						|
	// Assignment Operator
 | 
						|
	SIMD_FORCE_INLINE btQuaternion&
 | 
						|
	operator=(const btQuaternion& v)
 | 
						|
	{
 | 
						|
		mVec128 = v.mVec128;
 | 
						|
 | 
						|
		return *this;
 | 
						|
	}
 | 
						|
 | 
						|
#endif
 | 
						|
 | 
						|
	//		template <typename btScalar>
 | 
						|
	//		explicit Quaternion(const btScalar *v) : Tuple4<btScalar>(v) {}
 | 
						|
	/**@brief Constructor from scalars */
 | 
						|
	btQuaternion(const btScalar& _x, const btScalar& _y, const btScalar& _z, const btScalar& _w)
 | 
						|
		: btQuadWord(_x, _y, _z, _w)
 | 
						|
	{
 | 
						|
	}
 | 
						|
	/**@brief Axis angle Constructor
 | 
						|
   * @param axis The axis which the rotation is around
 | 
						|
   * @param angle The magnitude of the rotation around the angle (Radians) */
 | 
						|
	btQuaternion(const btVector3& _axis, const btScalar& _angle)
 | 
						|
	{
 | 
						|
		setRotation(_axis, _angle);
 | 
						|
	}
 | 
						|
	/**@brief Constructor from Euler angles
 | 
						|
   * @param yaw Angle around Y unless BT_EULER_DEFAULT_ZYX defined then Z
 | 
						|
   * @param pitch Angle around X unless BT_EULER_DEFAULT_ZYX defined then Y
 | 
						|
   * @param roll Angle around Z unless BT_EULER_DEFAULT_ZYX defined then X */
 | 
						|
	btQuaternion(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
 | 
						|
	{
 | 
						|
#ifndef BT_EULER_DEFAULT_ZYX
 | 
						|
		setEuler(yaw, pitch, roll);
 | 
						|
#else
 | 
						|
		setEulerZYX(yaw, pitch, roll);
 | 
						|
#endif
 | 
						|
	}
 | 
						|
	/**@brief Set the rotation using axis angle notation 
 | 
						|
   * @param axis The axis around which to rotate
 | 
						|
   * @param angle The magnitude of the rotation in Radians */
 | 
						|
	void setRotation(const btVector3& axis, const btScalar& _angle)
 | 
						|
	{
 | 
						|
		btScalar d = axis.length();
 | 
						|
		btAssert(d != btScalar(0.0));
 | 
						|
		btScalar s = btSin(_angle * btScalar(0.5)) / d;
 | 
						|
		setValue(axis.x() * s, axis.y() * s, axis.z() * s,
 | 
						|
				 btCos(_angle * btScalar(0.5)));
 | 
						|
	}
 | 
						|
	/**@brief Set the quaternion using Euler angles
 | 
						|
   * @param yaw Angle around Y
 | 
						|
   * @param pitch Angle around X
 | 
						|
   * @param roll Angle around Z */
 | 
						|
	void setEuler(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
 | 
						|
	{
 | 
						|
		btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
 | 
						|
		btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
 | 
						|
		btScalar halfRoll = btScalar(roll) * btScalar(0.5);
 | 
						|
		btScalar cosYaw = btCos(halfYaw);
 | 
						|
		btScalar sinYaw = btSin(halfYaw);
 | 
						|
		btScalar cosPitch = btCos(halfPitch);
 | 
						|
		btScalar sinPitch = btSin(halfPitch);
 | 
						|
		btScalar cosRoll = btCos(halfRoll);
 | 
						|
		btScalar sinRoll = btSin(halfRoll);
 | 
						|
		setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
 | 
						|
				 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
 | 
						|
				 sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
 | 
						|
				 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
 | 
						|
	}
 | 
						|
	/**@brief Set the quaternion using euler angles 
 | 
						|
   * @param yaw Angle around Z
 | 
						|
   * @param pitch Angle around Y
 | 
						|
   * @param roll Angle around X */
 | 
						|
	void setEulerZYX(const btScalar& yawZ, const btScalar& pitchY, const btScalar& rollX)
 | 
						|
	{
 | 
						|
		btScalar halfYaw = btScalar(yawZ) * btScalar(0.5);
 | 
						|
		btScalar halfPitch = btScalar(pitchY) * btScalar(0.5);
 | 
						|
		btScalar halfRoll = btScalar(rollX) * btScalar(0.5);
 | 
						|
		btScalar cosYaw = btCos(halfYaw);
 | 
						|
		btScalar sinYaw = btSin(halfYaw);
 | 
						|
		btScalar cosPitch = btCos(halfPitch);
 | 
						|
		btScalar sinPitch = btSin(halfPitch);
 | 
						|
		btScalar cosRoll = btCos(halfRoll);
 | 
						|
		btScalar sinRoll = btSin(halfRoll);
 | 
						|
		setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,   //x
 | 
						|
				 cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,   //y
 | 
						|
				 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,   //z
 | 
						|
				 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);  //formerly yzx
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Get the euler angles from this quaternion
 | 
						|
	   * @param yaw Angle around Z
 | 
						|
	   * @param pitch Angle around Y
 | 
						|
	   * @param roll Angle around X */
 | 
						|
	void getEulerZYX(btScalar& yawZ, btScalar& pitchY, btScalar& rollX) const
 | 
						|
	{
 | 
						|
		btScalar squ;
 | 
						|
		btScalar sqx;
 | 
						|
		btScalar sqy;
 | 
						|
		btScalar sqz;
 | 
						|
		btScalar sarg;
 | 
						|
		sqx = m_floats[0] * m_floats[0];
 | 
						|
		sqy = m_floats[1] * m_floats[1];
 | 
						|
		sqz = m_floats[2] * m_floats[2];
 | 
						|
		squ = m_floats[3] * m_floats[3];
 | 
						|
		sarg = btScalar(-2.) * (m_floats[0] * m_floats[2] - m_floats[3] * m_floats[1]);
 | 
						|
 | 
						|
		// If the pitch angle is PI/2 or -PI/2, we can only compute
 | 
						|
		// the sum roll + yaw.  However, any combination that gives
 | 
						|
		// the right sum will produce the correct orientation, so we
 | 
						|
		// set rollX = 0 and compute yawZ.
 | 
						|
		if (sarg <= -btScalar(0.99999))
 | 
						|
		{
 | 
						|
			pitchY = btScalar(-0.5) * SIMD_PI;
 | 
						|
			rollX = 0;
 | 
						|
			yawZ = btScalar(2) * btAtan2(m_floats[0], -m_floats[1]);
 | 
						|
		}
 | 
						|
		else if (sarg >= btScalar(0.99999))
 | 
						|
		{
 | 
						|
			pitchY = btScalar(0.5) * SIMD_PI;
 | 
						|
			rollX = 0;
 | 
						|
			yawZ = btScalar(2) * btAtan2(-m_floats[0], m_floats[1]);
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			pitchY = btAsin(sarg);
 | 
						|
			rollX = btAtan2(2 * (m_floats[1] * m_floats[2] + m_floats[3] * m_floats[0]), squ - sqx - sqy + sqz);
 | 
						|
			yawZ = btAtan2(2 * (m_floats[0] * m_floats[1] + m_floats[3] * m_floats[2]), squ + sqx - sqy - sqz);
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Add two quaternions
 | 
						|
   * @param q The quaternion to add to this one */
 | 
						|
	SIMD_FORCE_INLINE btQuaternion& operator+=(const btQuaternion& q)
 | 
						|
	{
 | 
						|
#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
 | 
						|
		mVec128 = _mm_add_ps(mVec128, q.mVec128);
 | 
						|
#elif defined(BT_USE_NEON)
 | 
						|
		mVec128 = vaddq_f32(mVec128, q.mVec128);
 | 
						|
#else
 | 
						|
		m_floats[0] += q.x();
 | 
						|
		m_floats[1] += q.y();
 | 
						|
		m_floats[2] += q.z();
 | 
						|
		m_floats[3] += q.m_floats[3];
 | 
						|
#endif
 | 
						|
		return *this;
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Subtract out a quaternion
 | 
						|
   * @param q The quaternion to subtract from this one */
 | 
						|
	btQuaternion& operator-=(const btQuaternion& q)
 | 
						|
	{
 | 
						|
#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
 | 
						|
		mVec128 = _mm_sub_ps(mVec128, q.mVec128);
 | 
						|
#elif defined(BT_USE_NEON)
 | 
						|
		mVec128 = vsubq_f32(mVec128, q.mVec128);
 | 
						|
#else
 | 
						|
		m_floats[0] -= q.x();
 | 
						|
		m_floats[1] -= q.y();
 | 
						|
		m_floats[2] -= q.z();
 | 
						|
		m_floats[3] -= q.m_floats[3];
 | 
						|
#endif
 | 
						|
		return *this;
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Scale this quaternion
 | 
						|
   * @param s The scalar to scale by */
 | 
						|
	btQuaternion& operator*=(const btScalar& s)
 | 
						|
	{
 | 
						|
#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
 | 
						|
		__m128 vs = _mm_load_ss(&s);  //	(S 0 0 0)
 | 
						|
		vs = bt_pshufd_ps(vs, 0);     //	(S S S S)
 | 
						|
		mVec128 = _mm_mul_ps(mVec128, vs);
 | 
						|
#elif defined(BT_USE_NEON)
 | 
						|
		mVec128 = vmulq_n_f32(mVec128, s);
 | 
						|
#else
 | 
						|
		m_floats[0] *= s;
 | 
						|
		m_floats[1] *= s;
 | 
						|
		m_floats[2] *= s;
 | 
						|
		m_floats[3] *= s;
 | 
						|
#endif
 | 
						|
		return *this;
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Multiply this quaternion by q on the right
 | 
						|
   * @param q The other quaternion 
 | 
						|
   * Equivilant to this = this * q */
 | 
						|
	btQuaternion& operator*=(const btQuaternion& q)
 | 
						|
	{
 | 
						|
#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
 | 
						|
		__m128 vQ2 = q.get128();
 | 
						|
 | 
						|
		__m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0, 1, 2, 0));
 | 
						|
		__m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3, 3, 3, 0));
 | 
						|
 | 
						|
		A1 = A1 * B1;
 | 
						|
 | 
						|
		__m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1, 2, 0, 1));
 | 
						|
		__m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2, 0, 1, 1));
 | 
						|
 | 
						|
		A2 = A2 * B2;
 | 
						|
 | 
						|
		B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2, 0, 1, 2));
 | 
						|
		B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1, 2, 0, 2));
 | 
						|
 | 
						|
		B1 = B1 * B2;  //	A3 *= B3
 | 
						|
 | 
						|
		mVec128 = bt_splat_ps(mVec128, 3);  //	A0
 | 
						|
		mVec128 = mVec128 * vQ2;            //	A0 * B0
 | 
						|
 | 
						|
		A1 = A1 + A2;                //	AB12
 | 
						|
		mVec128 = mVec128 - B1;      //	AB03 = AB0 - AB3
 | 
						|
		A1 = _mm_xor_ps(A1, vPPPM);  //	change sign of the last element
 | 
						|
		mVec128 = mVec128 + A1;      //	AB03 + AB12
 | 
						|
 | 
						|
#elif defined(BT_USE_NEON)
 | 
						|
 | 
						|
		float32x4_t vQ1 = mVec128;
 | 
						|
		float32x4_t vQ2 = q.get128();
 | 
						|
		float32x4_t A0, A1, B1, A2, B2, A3, B3;
 | 
						|
		float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
 | 
						|
 | 
						|
		{
 | 
						|
			float32x2x2_t tmp;
 | 
						|
			tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1));  // {z x}, {w y}
 | 
						|
			vQ1zx = tmp.val[0];
 | 
						|
 | 
						|
			tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2));  // {z x}, {w y}
 | 
						|
			vQ2zx = tmp.val[0];
 | 
						|
		}
 | 
						|
		vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
 | 
						|
 | 
						|
		vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
 | 
						|
 | 
						|
		vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
 | 
						|
		vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
 | 
						|
 | 
						|
		A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                     // X Y  z x
 | 
						|
		B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);  // W W  W X
 | 
						|
 | 
						|
		A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
 | 
						|
		B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
 | 
						|
 | 
						|
		A3 = vcombine_f32(vQ1zx, vQ1yz);  // Z X Y Z
 | 
						|
		B3 = vcombine_f32(vQ2yz, vQ2xz);  // Y Z x z
 | 
						|
 | 
						|
		A1 = vmulq_f32(A1, B1);
 | 
						|
		A2 = vmulq_f32(A2, B2);
 | 
						|
		A3 = vmulq_f32(A3, B3);                           //	A3 *= B3
 | 
						|
		A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1);  //	A0 * B0
 | 
						|
 | 
						|
		A1 = vaddq_f32(A1, A2);  //	AB12 = AB1 + AB2
 | 
						|
		A0 = vsubq_f32(A0, A3);  //	AB03 = AB0 - AB3
 | 
						|
 | 
						|
		//	change the sign of the last element
 | 
						|
		A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
 | 
						|
		A0 = vaddq_f32(A0, A1);  //	AB03 + AB12
 | 
						|
 | 
						|
		mVec128 = A0;
 | 
						|
#else
 | 
						|
		setValue(
 | 
						|
			m_floats[3] * q.x() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.z() - m_floats[2] * q.y(),
 | 
						|
			m_floats[3] * q.y() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.x() - m_floats[0] * q.z(),
 | 
						|
			m_floats[3] * q.z() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.y() - m_floats[1] * q.x(),
 | 
						|
			m_floats[3] * q.m_floats[3] - m_floats[0] * q.x() - m_floats[1] * q.y() - m_floats[2] * q.z());
 | 
						|
#endif
 | 
						|
		return *this;
 | 
						|
	}
 | 
						|
	/**@brief Return the dot product between this quaternion and another
 | 
						|
   * @param q The other quaternion */
 | 
						|
	btScalar dot(const btQuaternion& q) const
 | 
						|
	{
 | 
						|
#if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
 | 
						|
		__m128 vd;
 | 
						|
 | 
						|
		vd = _mm_mul_ps(mVec128, q.mVec128);
 | 
						|
 | 
						|
		__m128 t = _mm_movehl_ps(vd, vd);
 | 
						|
		vd = _mm_add_ps(vd, t);
 | 
						|
		t = _mm_shuffle_ps(vd, vd, 0x55);
 | 
						|
		vd = _mm_add_ss(vd, t);
 | 
						|
 | 
						|
		return _mm_cvtss_f32(vd);
 | 
						|
#elif defined(BT_USE_NEON)
 | 
						|
		float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
 | 
						|
		float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
 | 
						|
		x = vpadd_f32(x, x);
 | 
						|
		return vget_lane_f32(x, 0);
 | 
						|
#else
 | 
						|
		return m_floats[0] * q.x() +
 | 
						|
			   m_floats[1] * q.y() +
 | 
						|
			   m_floats[2] * q.z() +
 | 
						|
			   m_floats[3] * q.m_floats[3];
 | 
						|
#endif
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Return the length squared of the quaternion */
 | 
						|
	btScalar length2() const
 | 
						|
	{
 | 
						|
		return dot(*this);
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Return the length of the quaternion */
 | 
						|
	btScalar length() const
 | 
						|
	{
 | 
						|
		return btSqrt(length2());
 | 
						|
	}
 | 
						|
	btQuaternion& safeNormalize()
 | 
						|
	{
 | 
						|
		btScalar l2 = length2();
 | 
						|
		if (l2 > SIMD_EPSILON)
 | 
						|
		{
 | 
						|
			normalize();
 | 
						|
		}
 | 
						|
		return *this;
 | 
						|
	}
 | 
						|
	/**@brief Normalize the quaternion 
 | 
						|
   * Such that x^2 + y^2 + z^2 +w^2 = 1 */
 | 
						|
	btQuaternion& normalize()
 | 
						|
	{
 | 
						|
#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
 | 
						|
		__m128 vd;
 | 
						|
 | 
						|
		vd = _mm_mul_ps(mVec128, mVec128);
 | 
						|
 | 
						|
		__m128 t = _mm_movehl_ps(vd, vd);
 | 
						|
		vd = _mm_add_ps(vd, t);
 | 
						|
		t = _mm_shuffle_ps(vd, vd, 0x55);
 | 
						|
		vd = _mm_add_ss(vd, t);
 | 
						|
 | 
						|
		vd = _mm_sqrt_ss(vd);
 | 
						|
		vd = _mm_div_ss(vOnes, vd);
 | 
						|
		vd = bt_pshufd_ps(vd, 0);  // splat
 | 
						|
		mVec128 = _mm_mul_ps(mVec128, vd);
 | 
						|
 | 
						|
		return *this;
 | 
						|
#else
 | 
						|
		return *this /= length();
 | 
						|
#endif
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Return a scaled version of this quaternion
 | 
						|
   * @param s The scale factor */
 | 
						|
	SIMD_FORCE_INLINE btQuaternion
 | 
						|
	operator*(const btScalar& s) const
 | 
						|
	{
 | 
						|
#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
 | 
						|
		__m128 vs = _mm_load_ss(&s);  //	(S 0 0 0)
 | 
						|
		vs = bt_pshufd_ps(vs, 0x00);  //	(S S S S)
 | 
						|
 | 
						|
		return btQuaternion(_mm_mul_ps(mVec128, vs));
 | 
						|
#elif defined(BT_USE_NEON)
 | 
						|
		return btQuaternion(vmulq_n_f32(mVec128, s));
 | 
						|
#else
 | 
						|
		return btQuaternion(x() * s, y() * s, z() * s, m_floats[3] * s);
 | 
						|
#endif
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Return an inversely scaled versionof this quaternion
 | 
						|
   * @param s The inverse scale factor */
 | 
						|
	btQuaternion operator/(const btScalar& s) const
 | 
						|
	{
 | 
						|
		btAssert(s != btScalar(0.0));
 | 
						|
		return *this * (btScalar(1.0) / s);
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Inversely scale this quaternion
 | 
						|
   * @param s The scale factor */
 | 
						|
	btQuaternion& operator/=(const btScalar& s)
 | 
						|
	{
 | 
						|
		btAssert(s != btScalar(0.0));
 | 
						|
		return *this *= btScalar(1.0) / s;
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Return a normalized version of this quaternion */
 | 
						|
	btQuaternion normalized() const
 | 
						|
	{
 | 
						|
		return *this / length();
 | 
						|
	}
 | 
						|
	/**@brief Return the ***half*** angle between this quaternion and the other
 | 
						|
   * @param q The other quaternion */
 | 
						|
	btScalar angle(const btQuaternion& q) const
 | 
						|
	{
 | 
						|
		btScalar s = btSqrt(length2() * q.length2());
 | 
						|
		btAssert(s != btScalar(0.0));
 | 
						|
		return btAcos(dot(q) / s);
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Return the angle between this quaternion and the other along the shortest path
 | 
						|
	* @param q The other quaternion */
 | 
						|
	btScalar angleShortestPath(const btQuaternion& q) const
 | 
						|
	{
 | 
						|
		btScalar s = btSqrt(length2() * q.length2());
 | 
						|
		btAssert(s != btScalar(0.0));
 | 
						|
		if (dot(q) < 0)  // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
 | 
						|
			return btAcos(dot(-q) / s) * btScalar(2.0);
 | 
						|
		else
 | 
						|
			return btAcos(dot(q) / s) * btScalar(2.0);
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Return the angle [0, 2Pi] of rotation represented by this quaternion */
 | 
						|
	btScalar getAngle() const
 | 
						|
	{
 | 
						|
		btScalar s = btScalar(2.) * btAcos(m_floats[3]);
 | 
						|
		return s;
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Return the angle [0, Pi] of rotation represented by this quaternion along the shortest path */
 | 
						|
	btScalar getAngleShortestPath() const
 | 
						|
	{
 | 
						|
		btScalar s;
 | 
						|
		if (m_floats[3] >= 0)
 | 
						|
			s = btScalar(2.) * btAcos(m_floats[3]);
 | 
						|
		else
 | 
						|
			s = btScalar(2.) * btAcos(-m_floats[3]);
 | 
						|
		return s;
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Return the axis of the rotation represented by this quaternion */
 | 
						|
	btVector3 getAxis() const
 | 
						|
	{
 | 
						|
		btScalar s_squared = 1.f - m_floats[3] * m_floats[3];
 | 
						|
 | 
						|
		if (s_squared < btScalar(10.) * SIMD_EPSILON)  //Check for divide by zero
 | 
						|
			return btVector3(1.0, 0.0, 0.0);           // Arbitrary
 | 
						|
		btScalar s = 1.f / btSqrt(s_squared);
 | 
						|
		return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Return the inverse of this quaternion */
 | 
						|
	btQuaternion inverse() const
 | 
						|
	{
 | 
						|
#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
 | 
						|
		return btQuaternion(_mm_xor_ps(mVec128, vQInv));
 | 
						|
#elif defined(BT_USE_NEON)
 | 
						|
		return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
 | 
						|
#else
 | 
						|
		return btQuaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
 | 
						|
#endif
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Return the sum of this quaternion and the other 
 | 
						|
   * @param q2 The other quaternion */
 | 
						|
	SIMD_FORCE_INLINE btQuaternion
 | 
						|
	operator+(const btQuaternion& q2) const
 | 
						|
	{
 | 
						|
#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
 | 
						|
		return btQuaternion(_mm_add_ps(mVec128, q2.mVec128));
 | 
						|
#elif defined(BT_USE_NEON)
 | 
						|
		return btQuaternion(vaddq_f32(mVec128, q2.mVec128));
 | 
						|
#else
 | 
						|
		const btQuaternion& q1 = *this;
 | 
						|
		return btQuaternion(q1.x() + q2.x(), q1.y() + q2.y(), q1.z() + q2.z(), q1.m_floats[3] + q2.m_floats[3]);
 | 
						|
#endif
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Return the difference between this quaternion and the other 
 | 
						|
   * @param q2 The other quaternion */
 | 
						|
	SIMD_FORCE_INLINE btQuaternion
 | 
						|
	operator-(const btQuaternion& q2) const
 | 
						|
	{
 | 
						|
#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
 | 
						|
		return btQuaternion(_mm_sub_ps(mVec128, q2.mVec128));
 | 
						|
#elif defined(BT_USE_NEON)
 | 
						|
		return btQuaternion(vsubq_f32(mVec128, q2.mVec128));
 | 
						|
#else
 | 
						|
		const btQuaternion& q1 = *this;
 | 
						|
		return btQuaternion(q1.x() - q2.x(), q1.y() - q2.y(), q1.z() - q2.z(), q1.m_floats[3] - q2.m_floats[3]);
 | 
						|
#endif
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Return the negative of this quaternion 
 | 
						|
   * This simply negates each element */
 | 
						|
	SIMD_FORCE_INLINE btQuaternion operator-() const
 | 
						|
	{
 | 
						|
#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
 | 
						|
		return btQuaternion(_mm_xor_ps(mVec128, btvMzeroMask));
 | 
						|
#elif defined(BT_USE_NEON)
 | 
						|
		return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask));
 | 
						|
#else
 | 
						|
		const btQuaternion& q2 = *this;
 | 
						|
		return btQuaternion(-q2.x(), -q2.y(), -q2.z(), -q2.m_floats[3]);
 | 
						|
#endif
 | 
						|
	}
 | 
						|
	/**@todo document this and it's use */
 | 
						|
	SIMD_FORCE_INLINE btQuaternion farthest(const btQuaternion& qd) const
 | 
						|
	{
 | 
						|
		btQuaternion diff, sum;
 | 
						|
		diff = *this - qd;
 | 
						|
		sum = *this + qd;
 | 
						|
		if (diff.dot(diff) > sum.dot(sum))
 | 
						|
			return qd;
 | 
						|
		return (-qd);
 | 
						|
	}
 | 
						|
 | 
						|
	/**@todo document this and it's use */
 | 
						|
	SIMD_FORCE_INLINE btQuaternion nearest(const btQuaternion& qd) const
 | 
						|
	{
 | 
						|
		btQuaternion diff, sum;
 | 
						|
		diff = *this - qd;
 | 
						|
		sum = *this + qd;
 | 
						|
		if (diff.dot(diff) < sum.dot(sum))
 | 
						|
			return qd;
 | 
						|
		return (-qd);
 | 
						|
	}
 | 
						|
 | 
						|
	/**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
 | 
						|
   * @param q The other quaternion to interpolate with 
 | 
						|
   * @param t The ratio between this and q to interpolate.  If t = 0 the result is this, if t=1 the result is q.
 | 
						|
   * Slerp interpolates assuming constant velocity.  */
 | 
						|
	btQuaternion slerp(const btQuaternion& q, const btScalar& t) const
 | 
						|
	{
 | 
						|
		const btScalar magnitude = btSqrt(length2() * q.length2());
 | 
						|
		btAssert(magnitude > btScalar(0));
 | 
						|
 | 
						|
		const btScalar product = dot(q) / magnitude;
 | 
						|
		const btScalar absproduct = btFabs(product);
 | 
						|
 | 
						|
		if (absproduct < btScalar(1.0 - SIMD_EPSILON))
 | 
						|
		{
 | 
						|
			// Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
 | 
						|
			const btScalar theta = btAcos(absproduct);
 | 
						|
			const btScalar d = btSin(theta);
 | 
						|
			btAssert(d > btScalar(0));
 | 
						|
 | 
						|
			const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1);
 | 
						|
			const btScalar s0 = btSin((btScalar(1.0) - t) * theta) / d;
 | 
						|
			const btScalar s1 = btSin(sign * t * theta) / d;
 | 
						|
 | 
						|
			return btQuaternion(
 | 
						|
				(m_floats[0] * s0 + q.x() * s1),
 | 
						|
				(m_floats[1] * s0 + q.y() * s1),
 | 
						|
				(m_floats[2] * s0 + q.z() * s1),
 | 
						|
				(m_floats[3] * s0 + q.w() * s1));
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			return *this;
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	static const btQuaternion& getIdentity()
 | 
						|
	{
 | 
						|
		static const btQuaternion identityQuat(btScalar(0.), btScalar(0.), btScalar(0.), btScalar(1.));
 | 
						|
		return identityQuat;
 | 
						|
	}
 | 
						|
 | 
						|
	SIMD_FORCE_INLINE const btScalar& getW() const { return m_floats[3]; }
 | 
						|
 | 
						|
	SIMD_FORCE_INLINE void serialize(struct btQuaternionData& dataOut) const;
 | 
						|
 | 
						|
	SIMD_FORCE_INLINE void deSerialize(const struct btQuaternionFloatData& dataIn);
 | 
						|
 | 
						|
	SIMD_FORCE_INLINE void deSerialize(const struct btQuaternionDoubleData& dataIn);
 | 
						|
 | 
						|
	SIMD_FORCE_INLINE void serializeFloat(struct btQuaternionFloatData& dataOut) const;
 | 
						|
 | 
						|
	SIMD_FORCE_INLINE void deSerializeFloat(const struct btQuaternionFloatData& dataIn);
 | 
						|
 | 
						|
	SIMD_FORCE_INLINE void serializeDouble(struct btQuaternionDoubleData& dataOut) const;
 | 
						|
 | 
						|
	SIMD_FORCE_INLINE void deSerializeDouble(const struct btQuaternionDoubleData& dataIn);
 | 
						|
};
 | 
						|
 | 
						|
/**@brief Return the product of two quaternions */
 | 
						|
SIMD_FORCE_INLINE btQuaternion
 | 
						|
operator*(const btQuaternion& q1, const btQuaternion& q2)
 | 
						|
{
 | 
						|
#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
 | 
						|
	__m128 vQ1 = q1.get128();
 | 
						|
	__m128 vQ2 = q2.get128();
 | 
						|
	__m128 A0, A1, B1, A2, B2;
 | 
						|
 | 
						|
	A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0, 1, 2, 0));  // X Y  z x     //      vtrn
 | 
						|
	B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3, 3, 3, 0));  // W W  W X     // vdup vext
 | 
						|
 | 
						|
	A1 = A1 * B1;
 | 
						|
 | 
						|
	A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1, 2, 0, 1));  // Y Z  X Y     // vext
 | 
						|
	B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2, 0, 1, 1));  // z x  Y Y     // vtrn vdup
 | 
						|
 | 
						|
	A2 = A2 * B2;
 | 
						|
 | 
						|
	B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2, 0, 1, 2));  // z x Y Z      // vtrn vext
 | 
						|
	B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1, 2, 0, 2));  // Y Z x z      // vext vtrn
 | 
						|
 | 
						|
	B1 = B1 * B2;  //	A3 *= B3
 | 
						|
 | 
						|
	A0 = bt_splat_ps(vQ1, 3);  //	A0
 | 
						|
	A0 = A0 * vQ2;             //	A0 * B0
 | 
						|
 | 
						|
	A1 = A1 + A2;  //	AB12
 | 
						|
	A0 = A0 - B1;  //	AB03 = AB0 - AB3
 | 
						|
 | 
						|
	A1 = _mm_xor_ps(A1, vPPPM);  //	change sign of the last element
 | 
						|
	A0 = A0 + A1;                //	AB03 + AB12
 | 
						|
 | 
						|
	return btQuaternion(A0);
 | 
						|
 | 
						|
#elif defined(BT_USE_NEON)
 | 
						|
 | 
						|
	float32x4_t vQ1 = q1.get128();
 | 
						|
	float32x4_t vQ2 = q2.get128();
 | 
						|
	float32x4_t A0, A1, B1, A2, B2, A3, B3;
 | 
						|
	float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
 | 
						|
 | 
						|
	{
 | 
						|
		float32x2x2_t tmp;
 | 
						|
		tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1));  // {z x}, {w y}
 | 
						|
		vQ1zx = tmp.val[0];
 | 
						|
 | 
						|
		tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2));  // {z x}, {w y}
 | 
						|
		vQ2zx = tmp.val[0];
 | 
						|
	}
 | 
						|
	vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
 | 
						|
 | 
						|
	vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
 | 
						|
 | 
						|
	vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
 | 
						|
	vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
 | 
						|
 | 
						|
	A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                     // X Y  z x
 | 
						|
	B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);  // W W  W X
 | 
						|
 | 
						|
	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
 | 
						|
	B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
 | 
						|
 | 
						|
	A3 = vcombine_f32(vQ1zx, vQ1yz);  // Z X Y Z
 | 
						|
	B3 = vcombine_f32(vQ2yz, vQ2xz);  // Y Z x z
 | 
						|
 | 
						|
	A1 = vmulq_f32(A1, B1);
 | 
						|
	A2 = vmulq_f32(A2, B2);
 | 
						|
	A3 = vmulq_f32(A3, B3);                           //	A3 *= B3
 | 
						|
	A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1);  //	A0 * B0
 | 
						|
 | 
						|
	A1 = vaddq_f32(A1, A2);  //	AB12 = AB1 + AB2
 | 
						|
	A0 = vsubq_f32(A0, A3);  //	AB03 = AB0 - AB3
 | 
						|
 | 
						|
	//	change the sign of the last element
 | 
						|
	A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
 | 
						|
	A0 = vaddq_f32(A0, A1);  //	AB03 + AB12
 | 
						|
 | 
						|
	return btQuaternion(A0);
 | 
						|
 | 
						|
#else
 | 
						|
	return btQuaternion(
 | 
						|
		q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
 | 
						|
		q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
 | 
						|
		q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
 | 
						|
		q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z());
 | 
						|
#endif
 | 
						|
}
 | 
						|
 | 
						|
SIMD_FORCE_INLINE btQuaternion
 | 
						|
operator*(const btQuaternion& q, const btVector3& w)
 | 
						|
{
 | 
						|
#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
 | 
						|
	__m128 vQ1 = q.get128();
 | 
						|
	__m128 vQ2 = w.get128();
 | 
						|
	__m128 A1, B1, A2, B2, A3, B3;
 | 
						|
 | 
						|
	A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3, 3, 3, 0));
 | 
						|
	B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0, 1, 2, 0));
 | 
						|
 | 
						|
	A1 = A1 * B1;
 | 
						|
 | 
						|
	A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1, 2, 0, 1));
 | 
						|
	B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2, 0, 1, 1));
 | 
						|
 | 
						|
	A2 = A2 * B2;
 | 
						|
 | 
						|
	A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2, 0, 1, 2));
 | 
						|
	B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1, 2, 0, 2));
 | 
						|
 | 
						|
	A3 = A3 * B3;  //	A3 *= B3
 | 
						|
 | 
						|
	A1 = A1 + A2;                //	AB12
 | 
						|
	A1 = _mm_xor_ps(A1, vPPPM);  //	change sign of the last element
 | 
						|
	A1 = A1 - A3;                //	AB123 = AB12 - AB3
 | 
						|
 | 
						|
	return btQuaternion(A1);
 | 
						|
 | 
						|
#elif defined(BT_USE_NEON)
 | 
						|
 | 
						|
	float32x4_t vQ1 = q.get128();
 | 
						|
	float32x4_t vQ2 = w.get128();
 | 
						|
	float32x4_t A1, B1, A2, B2, A3, B3;
 | 
						|
	float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
 | 
						|
 | 
						|
	vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
 | 
						|
	{
 | 
						|
		float32x2x2_t tmp;
 | 
						|
 | 
						|
		tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2));  // {z x}, {w y}
 | 
						|
		vQ2zx = tmp.val[0];
 | 
						|
 | 
						|
		tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1));  // {z x}, {w y}
 | 
						|
		vQ1zx = tmp.val[0];
 | 
						|
	}
 | 
						|
 | 
						|
	vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
 | 
						|
 | 
						|
	vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
 | 
						|
	vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
 | 
						|
 | 
						|
	A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx);  // W W  W X
 | 
						|
	B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx);                     // X Y  z x
 | 
						|
 | 
						|
	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
 | 
						|
	B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
 | 
						|
 | 
						|
	A3 = vcombine_f32(vQ1zx, vQ1yz);  // Z X Y Z
 | 
						|
	B3 = vcombine_f32(vQ2yz, vQ2xz);  // Y Z x z
 | 
						|
 | 
						|
	A1 = vmulq_f32(A1, B1);
 | 
						|
	A2 = vmulq_f32(A2, B2);
 | 
						|
	A3 = vmulq_f32(A3, B3);  //	A3 *= B3
 | 
						|
 | 
						|
	A1 = vaddq_f32(A1, A2);  //	AB12 = AB1 + AB2
 | 
						|
 | 
						|
	//	change the sign of the last element
 | 
						|
	A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
 | 
						|
 | 
						|
	A1 = vsubq_f32(A1, A3);  //	AB123 = AB12 - AB3
 | 
						|
 | 
						|
	return btQuaternion(A1);
 | 
						|
 | 
						|
#else
 | 
						|
	return btQuaternion(
 | 
						|
		q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
 | 
						|
		q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
 | 
						|
		q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
 | 
						|
		-q.x() * w.x() - q.y() * w.y() - q.z() * w.z());
 | 
						|
#endif
 | 
						|
}
 | 
						|
 | 
						|
SIMD_FORCE_INLINE btQuaternion
 | 
						|
operator*(const btVector3& w, const btQuaternion& q)
 | 
						|
{
 | 
						|
#if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
 | 
						|
	__m128 vQ1 = w.get128();
 | 
						|
	__m128 vQ2 = q.get128();
 | 
						|
	__m128 A1, B1, A2, B2, A3, B3;
 | 
						|
 | 
						|
	A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0, 1, 2, 0));  // X Y  z x
 | 
						|
	B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3, 3, 3, 0));  // W W  W X
 | 
						|
 | 
						|
	A1 = A1 * B1;
 | 
						|
 | 
						|
	A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1, 2, 0, 1));
 | 
						|
	B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2, 0, 1, 1));
 | 
						|
 | 
						|
	A2 = A2 * B2;
 | 
						|
 | 
						|
	A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2, 0, 1, 2));
 | 
						|
	B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1, 2, 0, 2));
 | 
						|
 | 
						|
	A3 = A3 * B3;  //	A3 *= B3
 | 
						|
 | 
						|
	A1 = A1 + A2;                //	AB12
 | 
						|
	A1 = _mm_xor_ps(A1, vPPPM);  //	change sign of the last element
 | 
						|
	A1 = A1 - A3;                //	AB123 = AB12 - AB3
 | 
						|
 | 
						|
	return btQuaternion(A1);
 | 
						|
 | 
						|
#elif defined(BT_USE_NEON)
 | 
						|
 | 
						|
	float32x4_t vQ1 = w.get128();
 | 
						|
	float32x4_t vQ2 = q.get128();
 | 
						|
	float32x4_t A1, B1, A2, B2, A3, B3;
 | 
						|
	float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
 | 
						|
 | 
						|
	{
 | 
						|
		float32x2x2_t tmp;
 | 
						|
 | 
						|
		tmp = vtrn_f32(vget_high_f32(vQ1), vget_low_f32(vQ1));  // {z x}, {w y}
 | 
						|
		vQ1zx = tmp.val[0];
 | 
						|
 | 
						|
		tmp = vtrn_f32(vget_high_f32(vQ2), vget_low_f32(vQ2));  // {z x}, {w y}
 | 
						|
		vQ2zx = tmp.val[0];
 | 
						|
	}
 | 
						|
	vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
 | 
						|
 | 
						|
	vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
 | 
						|
 | 
						|
	vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
 | 
						|
	vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
 | 
						|
 | 
						|
	A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                     // X Y  z x
 | 
						|
	B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);  // W W  W X
 | 
						|
 | 
						|
	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
 | 
						|
	B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
 | 
						|
 | 
						|
	A3 = vcombine_f32(vQ1zx, vQ1yz);  // Z X Y Z
 | 
						|
	B3 = vcombine_f32(vQ2yz, vQ2xz);  // Y Z x z
 | 
						|
 | 
						|
	A1 = vmulq_f32(A1, B1);
 | 
						|
	A2 = vmulq_f32(A2, B2);
 | 
						|
	A3 = vmulq_f32(A3, B3);  //	A3 *= B3
 | 
						|
 | 
						|
	A1 = vaddq_f32(A1, A2);  //	AB12 = AB1 + AB2
 | 
						|
 | 
						|
	//	change the sign of the last element
 | 
						|
	A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
 | 
						|
 | 
						|
	A1 = vsubq_f32(A1, A3);  //	AB123 = AB12 - AB3
 | 
						|
 | 
						|
	return btQuaternion(A1);
 | 
						|
 | 
						|
#else
 | 
						|
	return btQuaternion(
 | 
						|
		+w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
 | 
						|
		+w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
 | 
						|
		+w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
 | 
						|
		-w.x() * q.x() - w.y() * q.y() - w.z() * q.z());
 | 
						|
#endif
 | 
						|
}
 | 
						|
 | 
						|
/**@brief Calculate the dot product between two quaternions */
 | 
						|
SIMD_FORCE_INLINE btScalar
 | 
						|
dot(const btQuaternion& q1, const btQuaternion& q2)
 | 
						|
{
 | 
						|
	return q1.dot(q2);
 | 
						|
}
 | 
						|
 | 
						|
/**@brief Return the length of a quaternion */
 | 
						|
SIMD_FORCE_INLINE btScalar
 | 
						|
length(const btQuaternion& q)
 | 
						|
{
 | 
						|
	return q.length();
 | 
						|
}
 | 
						|
 | 
						|
/**@brief Return the angle between two quaternions*/
 | 
						|
SIMD_FORCE_INLINE btScalar
 | 
						|
btAngle(const btQuaternion& q1, const btQuaternion& q2)
 | 
						|
{
 | 
						|
	return q1.angle(q2);
 | 
						|
}
 | 
						|
 | 
						|
/**@brief Return the inverse of a quaternion*/
 | 
						|
SIMD_FORCE_INLINE btQuaternion
 | 
						|
inverse(const btQuaternion& q)
 | 
						|
{
 | 
						|
	return q.inverse();
 | 
						|
}
 | 
						|
 | 
						|
/**@brief Return the result of spherical linear interpolation betwen two quaternions 
 | 
						|
 * @param q1 The first quaternion
 | 
						|
 * @param q2 The second quaternion 
 | 
						|
 * @param t The ration between q1 and q2.  t = 0 return q1, t=1 returns q2 
 | 
						|
 * Slerp assumes constant velocity between positions. */
 | 
						|
SIMD_FORCE_INLINE btQuaternion
 | 
						|
slerp(const btQuaternion& q1, const btQuaternion& q2, const btScalar& t)
 | 
						|
{
 | 
						|
	return q1.slerp(q2, t);
 | 
						|
}
 | 
						|
 | 
						|
SIMD_FORCE_INLINE btVector3
 | 
						|
quatRotate(const btQuaternion& rotation, const btVector3& v)
 | 
						|
{
 | 
						|
	btQuaternion q = rotation * v;
 | 
						|
	q *= rotation.inverse();
 | 
						|
#if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
 | 
						|
	return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
 | 
						|
#elif defined(BT_USE_NEON)
 | 
						|
	return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
 | 
						|
#else
 | 
						|
	return btVector3(q.getX(), q.getY(), q.getZ());
 | 
						|
#endif
 | 
						|
}
 | 
						|
 | 
						|
SIMD_FORCE_INLINE btQuaternion
 | 
						|
shortestArcQuat(const btVector3& v0, const btVector3& v1)  // Game Programming Gems 2.10. make sure v0,v1 are normalized
 | 
						|
{
 | 
						|
	btVector3 c = v0.cross(v1);
 | 
						|
	btScalar d = v0.dot(v1);
 | 
						|
 | 
						|
	if (d < -1.0 + SIMD_EPSILON)
 | 
						|
	{
 | 
						|
		btVector3 n, unused;
 | 
						|
		btPlaneSpace1(v0, n, unused);
 | 
						|
		return btQuaternion(n.x(), n.y(), n.z(), 0.0f);  // just pick any vector that is orthogonal to v0
 | 
						|
	}
 | 
						|
 | 
						|
	btScalar s = btSqrt((1.0f + d) * 2.0f);
 | 
						|
	btScalar rs = 1.0f / s;
 | 
						|
 | 
						|
	return btQuaternion(c.getX() * rs, c.getY() * rs, c.getZ() * rs, s * 0.5f);
 | 
						|
}
 | 
						|
 | 
						|
SIMD_FORCE_INLINE btQuaternion
 | 
						|
shortestArcQuatNormalize2(btVector3& v0, btVector3& v1)
 | 
						|
{
 | 
						|
	v0.normalize();
 | 
						|
	v1.normalize();
 | 
						|
	return shortestArcQuat(v0, v1);
 | 
						|
}
 | 
						|
 | 
						|
struct btQuaternionFloatData
 | 
						|
{
 | 
						|
	float m_floats[4];
 | 
						|
};
 | 
						|
 | 
						|
struct btQuaternionDoubleData
 | 
						|
{
 | 
						|
	double m_floats[4];
 | 
						|
};
 | 
						|
 | 
						|
SIMD_FORCE_INLINE void btQuaternion::serializeFloat(struct btQuaternionFloatData& dataOut) const
 | 
						|
{
 | 
						|
	///could also do a memcpy, check if it is worth it
 | 
						|
	for (int i = 0; i < 4; i++)
 | 
						|
		dataOut.m_floats[i] = float(m_floats[i]);
 | 
						|
}
 | 
						|
 | 
						|
SIMD_FORCE_INLINE void btQuaternion::deSerializeFloat(const struct btQuaternionFloatData& dataIn)
 | 
						|
{
 | 
						|
	for (int i = 0; i < 4; i++)
 | 
						|
		m_floats[i] = btScalar(dataIn.m_floats[i]);
 | 
						|
}
 | 
						|
 | 
						|
SIMD_FORCE_INLINE void btQuaternion::serializeDouble(struct btQuaternionDoubleData& dataOut) const
 | 
						|
{
 | 
						|
	///could also do a memcpy, check if it is worth it
 | 
						|
	for (int i = 0; i < 4; i++)
 | 
						|
		dataOut.m_floats[i] = double(m_floats[i]);
 | 
						|
}
 | 
						|
 | 
						|
SIMD_FORCE_INLINE void btQuaternion::deSerializeDouble(const struct btQuaternionDoubleData& dataIn)
 | 
						|
{
 | 
						|
	for (int i = 0; i < 4; i++)
 | 
						|
		m_floats[i] = btScalar(dataIn.m_floats[i]);
 | 
						|
}
 | 
						|
 | 
						|
SIMD_FORCE_INLINE void btQuaternion::serialize(struct btQuaternionData& dataOut) const
 | 
						|
{
 | 
						|
	///could also do a memcpy, check if it is worth it
 | 
						|
	for (int i = 0; i < 4; i++)
 | 
						|
		dataOut.m_floats[i] = m_floats[i];
 | 
						|
}
 | 
						|
 | 
						|
SIMD_FORCE_INLINE void btQuaternion::deSerialize(const struct btQuaternionFloatData& dataIn)
 | 
						|
{
 | 
						|
	for (int i = 0; i < 4; i++)
 | 
						|
		m_floats[i] = (btScalar)dataIn.m_floats[i];
 | 
						|
}
 | 
						|
 | 
						|
SIMD_FORCE_INLINE void btQuaternion::deSerialize(const struct btQuaternionDoubleData& dataIn)
 | 
						|
{
 | 
						|
	for (int i = 0; i < 4; i++)
 | 
						|
		m_floats[i] = (btScalar)dataIn.m_floats[i];
 | 
						|
}
 | 
						|
 | 
						|
#endif  //BT_SIMD__QUATERNION_H_
 |