Differential D2857 Diff 15005 source/maths/Matrix3D.cpp

Changeset View

Standalone View

source/maths/Matrix3D.cpp

/* Copyright (C) 2019 Wildfire Games.		/* Copyright (C) 2021 Wildfire Games.
* This file is part of 0 A.D.		* This file is part of 0 A.D.
*		*
* 0 A.D. is free software: you can redistribute it and/or modify		* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by		* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or		* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.		* (at your option) any later version.
*		*
* 0 A.D. is distributed in the hope that it will be useful,		* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of		* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.		* GNU General Public License for more details.
*		*
* You should have received a copy of the GNU General Public License		* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see <http://www.gnu.org/licenses/>.		* along with 0 A.D. If not, see <http://www.gnu.org/licenses/>.
*/		*/

/*		/*
* A Matrix class used for holding and manipulating transformation		* A Matrix class used for holding and manipulating transformation
* info.		* info.
*/		*/

#include "precompiled.h"		#include "precompiled.h"

		#include "lib/sysdep/compiler.h"
#include "Matrix3D.h"		#include "Matrix3D.h"
#include "Quaternion.h"		#include "Quaternion.h"
#include "Vector4D.h"		#include "Vector4D.h"

		#if COMPILER_HAS_SSE
		# include "lib/sse.h"
		# include <xmmintrin.h>
		#endif

//Sets the identity matrix		//Sets the identity matrix
void CMatrix3D::SetIdentity ()		void CMatrix3D::SetIdentity ()
{		{
_11=1.0f; _12=0.0f; _13=0.0f; _14=0.0f;		_11=1.0f; _12=0.0f; _13=0.0f; _14=0.0f;
_21=0.0f; _22=1.0f; _23=0.0f; _24=0.0f;		_21=0.0f; _22=1.0f; _23=0.0f; _24=0.0f;
_31=0.0f; _32=0.0f; _33=1.0f; _34=0.0f;		_31=0.0f; _32=0.0f; _33=1.0f; _34=0.0f;
_41=0.0f; _42=0.0f; _43=0.0f; _44=1.0f;		_41=0.0f; _42=0.0f; _43=0.0f; _44=1.0f;
}		}
▲ Show 20 Lines • Show All 400 Lines • ▼ Show 20 Lines	float CMatrix3D::GetYRotation() const
float len = axis.Length();		float len = axis.Length();
if (len < 0.0001f)		if (len < 0.0001f)
return 0.f;		return 0.f;
axis *= 1.0f/len;		axis *= 1.0f/len;

// Negate the return angle to match the SetYRotation convention		// Negate the return angle to match the SetYRotation convention
return -atan2(axis.Z, axis.X);		return -atan2(axis.Z, axis.X);
}		}

		#if COMPILER_HAS_SSE

		// matrix SSE multiplication

		CMatrix3D MultiplicationSSE(const CMatrix3D& source, const CMatrix3D& matrix)
		{
		CMatrix3D result;
		__m128 col1 = _mm_loadu_ps(source._data2d[0]);
		__m128 col2 = _mm_loadu_ps(source._data2d[1]);
		__m128 col3 = _mm_loadu_ps(source._data2d[2]);
		__m128 col4 = _mm_loadu_ps(source._data2d[3]);

		__m128 mvec = _mm_set_ps1(matrix._11);
		__m128 vec = _mm_mul_ps(mvec, col1);
		mvec = _mm_set_ps1(matrix._21);
		vec = _mm_add_ps(_mm_mul_ps(mvec, col2), vec);
		mvec = _mm_set_ps1(matrix._31);
		vec = _mm_add_ps(_mm_mul_ps(mvec, col3), vec);
		mvec = _mm_set_ps1(matrix._41);
		_mm_storeu_ps(result._data2d[0], _mm_add_ps(_mm_mul_ps(mvec, col4), vec));

		mvec = _mm_set_ps1(matrix._12);
		vec = _mm_mul_ps(mvec, col1);
		mvec = _mm_set_ps1(matrix._22);
		vec = _mm_add_ps(_mm_mul_ps(mvec, col2), vec);
		mvec = _mm_set_ps1(matrix._32);
		vec = _mm_add_ps(_mm_mul_ps(mvec, col3), vec);
		mvec = _mm_set_ps1(matrix._42);
		_mm_storeu_ps(result._data2d[1], _mm_add_ps(_mm_mul_ps(mvec, col4), vec));

		mvec = _mm_set_ps1(matrix._13);
		vec = _mm_mul_ps(mvec, col1);
		mvec = _mm_set_ps1(matrix._23);
		vec = _mm_add_ps(_mm_mul_ps(mvec, col2), vec);
		mvec = _mm_set_ps1(matrix._33);
		vec = _mm_add_ps(_mm_mul_ps(mvec, col3), vec);
		mvec = _mm_set_ps1(matrix._43);
		_mm_storeu_ps(result._data2d[2], _mm_add_ps(_mm_mul_ps(mvec, col4), vec));

		mvec = _mm_set_ps1(matrix._14);
		vec = _mm_mul_ps(mvec, col1);
		mvec = _mm_set_ps1(matrix._24);
		vec = _mm_add_ps(_mm_mul_ps(mvec, col2), vec);
		mvec = _mm_set_ps1(matrix._34);
		vec = _mm_add_ps(_mm_mul_ps(mvec, col3), vec);
		mvec = _mm_set_ps1(matrix._44);
		_mm_storeu_ps(result._data2d[3], _mm_add_ps(_mm_mul_ps(mvec, col4), vec));
		return result;
		}

		// blend matrix SSE using only 4x3 and add onto existing blend

		void AddBlendSSE(CMatrix3D* s, const CMatrix3D& m, float f)
		{
		__m128 fvec = _mm_set_ps(0, f, f, f);

		__m128 col = _mm_loadu_ps(s->_data2d[0]);
		__m128 mcol = _mm_loadu_ps(m._data2d[0]);
		_mm_storeu_ps(s->_data2d[0], _mm_add_ps(_mm_mul_ps(mcol, fvec), col));
		col = _mm_loadu_ps(s->_data2d[1]);
		mcol = _mm_loadu_ps(m._data2d[1]);
		_mm_storeu_ps(s->_data2d[1], _mm_add_ps(_mm_mul_ps(mcol, fvec), col));
		col = _mm_loadu_ps(s->_data2d[2]);
		mcol = _mm_loadu_ps(m._data2d[2]);
		_mm_storeu_ps(s->_data2d[2], _mm_add_ps(_mm_mul_ps(mcol, fvec), col));
		col = _mm_loadu_ps(s->_data2d[3]);
		mcol = _mm_loadu_ps(m._data2d[3]);
		_mm_storeu_ps(s->_data2d[3], _mm_add_ps(_mm_mul_ps(mcol, fvec), col));
		}

		// blend matrix SSE using only 4x3 subset

		void BlendSSE(CMatrix3D* s, const CMatrix3D& m, float f)
		{
		__m128 fvec = _mm_set_ps(0, f, f, f);
		__m128 mask = _mm_set_ps(1, 0, 0, 0);

		__m128 col = _mm_loadu_ps(s->_data2d[0]);
		__m128 mcol = _mm_loadu_ps(m._data2d[0]);
		_mm_storeu_ps(s->_data2d[0], _mm_add_ps(_mm_mul_ps(col, mask), _mm_mul_ps(mcol, fvec)));
		col = _mm_loadu_ps(s->_data2d[1]);
		mcol = _mm_loadu_ps(m._data2d[1]);
		_mm_storeu_ps(s->_data2d[1], _mm_add_ps(_mm_mul_ps(col, mask), _mm_mul_ps(mcol, fvec)));
		col = _mm_loadu_ps(s->_data2d[2]);
		mcol = _mm_loadu_ps(m._data2d[2]);
		_mm_storeu_ps(s->_data2d[2], _mm_add_ps(_mm_mul_ps(col, mask), _mm_mul_ps(mcol, fvec)));
		col = _mm_loadu_ps(s->_data2d[3]);
		mcol = _mm_loadu_ps(m._data2d[3]);
		_mm_storeu_ps(s->_data2d[3], _mm_add_ps(_mm_mul_ps(col, mask), _mm_mul_ps(mcol, fvec)));
		}
		#endif

		CMatrix3D MultiplicationFallback(const CMatrix3D& source, const CMatrix3D& matrix)
		{
		return CMatrix3D(
		source._11 * matrix._11 + source._12 * matrix._21 + source._13 * matrix._31 + source._14 * matrix._41,
		source._11 * matrix._12 + source._12 * matrix._22 + source._13 * matrix._32 + source._14 * matrix._42,
		source._11 * matrix._13 + source._12 * matrix._23 + source._13 * matrix._33 + source._14 * matrix._43,
		source._11 * matrix._14 + source._12 * matrix._24 + source._13 * matrix._34 + source._14 * matrix._44,

		source._21 * matrix._11 + source._22 * matrix._21 + source._23 * matrix._31 + source._24 * matrix._41,
		source._21 * matrix._12 + source._22 * matrix._22 + source._23 * matrix._32 + source._24 * matrix._42,
		source._21 * matrix._13 + source._22 * matrix._23 + source._23 * matrix._33 + source._24 * matrix._43,
		source._21 * matrix._14 + source._22 * matrix._24 + source._23 * matrix._34 + source._24 * matrix._44,

		source._31 * matrix._11 + source._32 * matrix._21 + source._33 * matrix._31 + source._34 * matrix._41,
		source._31 * matrix._12 + source._32 * matrix._22 + source._33 * matrix._32 + source._34 * matrix._42,
		source._31 * matrix._13 + source._32 * matrix._23 + source._33 * matrix._33 + source._34 * matrix._43,
		source._31 * matrix._14 + source._32 * matrix._24 + source._33 * matrix._34 + source._34 * matrix._44,

		source._41 * matrix._11 + source._42 * matrix._21 + source._43 * matrix._31 + source._44 * matrix._41,
		source._41 * matrix._12 + source._42 * matrix._22 + source._43 * matrix._32 + source._44 * matrix._42,
		source._41 * matrix._13 + source._42 * matrix._23 + source._43 * matrix._33 + source._44 * matrix._43,
		source._41 * matrix._14 + source._42 * matrix._24 + source._43 * matrix._34 + source._44 * matrix._44
		);
		}

		void BlendFallback(CMatrix3D* s, const CMatrix3D& m, float f)
		{
		s->_11 = m._11 * f; s->_21 = m._21 * f; s->_31 = m._31 * f;
		s->_12 = m._12 * f; s->_22 = m._22 * f; s->_32 = m._32 * f;
		s->_13 = m._13 * f; s->_23 = m._23 * f; s->_33 = m._33 * f;
		s->_14 = m._14 * f; s->_24 = m._24 * f; s->_34 = m._34 * f;
		}

		void AddBlendFallback(CMatrix3D* s, const CMatrix3D& m, float f)
		{
		s->_11 += m._11 * f; s->_21 += m._21 * f; s->_31 += m._31 * f;
		s->_12 += m._12 * f; s->_22 += m._22 * f; s->_32 += m._32 * f;
		s->_13 += m._13 * f; s->_23 += m._23 * f; s->_33 += m._33 * f;
		s->_14 += m._14 * f; s->_24 += m._24 * f; s->_34 += m._34 * f;
		}


		CMatrix3D(*CMatrix3D::Multiply)(const CMatrix3D& source, const CMatrix3D& matrix) = MultiplicationFallback;
		void(CMatrix3D::BlendMat)(CMatrix3D s, const CMatrix3D& m, float f) = BlendFallback;
		void(CMatrix3D::AddBlendMat)(CMatrix3D s, const CMatrix3D& m, float f) = AddBlendFallback;

		void Matrix3DActivateFastImpl()
		{
		#if COMPILER_HAS_SSE
		if (HostHasSSE())
		{
		CMatrix3D::Multiply = MultiplicationSSE;
		CMatrix3D::AddBlendMat = AddBlendSSE;
		CMatrix3D::BlendMat = BlendSSE;
		return;
		}
		#endif
		}