Changeset View
Changeset View
Standalone View
Standalone View
source/maths/Matrix3D.cpp
/* Copyright (C) 2019 Wildfire Games. | /* Copyright (C) 2021 Wildfire Games. | ||||
* This file is part of 0 A.D. | * This file is part of 0 A.D. | ||||
* | * | ||||
* 0 A.D. is free software: you can redistribute it and/or modify | * 0 A.D. is free software: you can redistribute it and/or modify | ||||
* it under the terms of the GNU General Public License as published by | * it under the terms of the GNU General Public License as published by | ||||
* the Free Software Foundation, either version 2 of the License, or | * the Free Software Foundation, either version 2 of the License, or | ||||
* (at your option) any later version. | * (at your option) any later version. | ||||
* | * | ||||
* 0 A.D. is distributed in the hope that it will be useful, | * 0 A.D. is distributed in the hope that it will be useful, | ||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
* GNU General Public License for more details. | * GNU General Public License for more details. | ||||
* | * | ||||
* You should have received a copy of the GNU General Public License | * You should have received a copy of the GNU General Public License | ||||
* along with 0 A.D. If not, see <http://www.gnu.org/licenses/>. | * along with 0 A.D. If not, see <http://www.gnu.org/licenses/>. | ||||
*/ | */ | ||||
/* | /* | ||||
* A Matrix class used for holding and manipulating transformation | * A Matrix class used for holding and manipulating transformation | ||||
* info. | * info. | ||||
*/ | */ | ||||
#include "precompiled.h" | #include "precompiled.h" | ||||
#include "lib/sysdep/compiler.h" | |||||
#include "Matrix3D.h" | #include "Matrix3D.h" | ||||
#include "Quaternion.h" | #include "Quaternion.h" | ||||
#include "Vector4D.h" | #include "Vector4D.h" | ||||
#if COMPILER_HAS_SSE | |||||
# include "lib/sse.h" | |||||
# include <xmmintrin.h> | |||||
#endif | |||||
//Sets the identity matrix | //Sets the identity matrix | ||||
void CMatrix3D::SetIdentity () | void CMatrix3D::SetIdentity () | ||||
{ | { | ||||
_11=1.0f; _12=0.0f; _13=0.0f; _14=0.0f; | _11=1.0f; _12=0.0f; _13=0.0f; _14=0.0f; | ||||
_21=0.0f; _22=1.0f; _23=0.0f; _24=0.0f; | _21=0.0f; _22=1.0f; _23=0.0f; _24=0.0f; | ||||
_31=0.0f; _32=0.0f; _33=1.0f; _34=0.0f; | _31=0.0f; _32=0.0f; _33=1.0f; _34=0.0f; | ||||
_41=0.0f; _42=0.0f; _43=0.0f; _44=1.0f; | _41=0.0f; _42=0.0f; _43=0.0f; _44=1.0f; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 400 Lines • ▼ Show 20 Lines | float CMatrix3D::GetYRotation() const | ||||
float len = axis.Length(); | float len = axis.Length(); | ||||
if (len < 0.0001f) | if (len < 0.0001f) | ||||
return 0.f; | return 0.f; | ||||
axis *= 1.0f/len; | axis *= 1.0f/len; | ||||
// Negate the return angle to match the SetYRotation convention | // Negate the return angle to match the SetYRotation convention | ||||
return -atan2(axis.Z, axis.X); | return -atan2(axis.Z, axis.X); | ||||
} | } | ||||
#if COMPILER_HAS_SSE | |||||
// matrix SSE multiplication | |||||
CMatrix3D MultiplicationSSE(const CMatrix3D& source, const CMatrix3D& matrix) | |||||
{ | |||||
CMatrix3D result; | |||||
__m128 col1 = _mm_loadu_ps(source._data2d[0]); | |||||
__m128 col2 = _mm_loadu_ps(source._data2d[1]); | |||||
__m128 col3 = _mm_loadu_ps(source._data2d[2]); | |||||
__m128 col4 = _mm_loadu_ps(source._data2d[3]); | |||||
__m128 mvec = _mm_set_ps1(matrix._11); | |||||
__m128 vec = _mm_mul_ps(mvec, col1); | |||||
mvec = _mm_set_ps1(matrix._21); | |||||
vec = _mm_add_ps(_mm_mul_ps(mvec, col2), vec); | |||||
mvec = _mm_set_ps1(matrix._31); | |||||
vec = _mm_add_ps(_mm_mul_ps(mvec, col3), vec); | |||||
mvec = _mm_set_ps1(matrix._41); | |||||
_mm_storeu_ps(result._data2d[0], _mm_add_ps(_mm_mul_ps(mvec, col4), vec)); | |||||
mvec = _mm_set_ps1(matrix._12); | |||||
vec = _mm_mul_ps(mvec, col1); | |||||
mvec = _mm_set_ps1(matrix._22); | |||||
vec = _mm_add_ps(_mm_mul_ps(mvec, col2), vec); | |||||
mvec = _mm_set_ps1(matrix._32); | |||||
vec = _mm_add_ps(_mm_mul_ps(mvec, col3), vec); | |||||
mvec = _mm_set_ps1(matrix._42); | |||||
_mm_storeu_ps(result._data2d[1], _mm_add_ps(_mm_mul_ps(mvec, col4), vec)); | |||||
mvec = _mm_set_ps1(matrix._13); | |||||
vec = _mm_mul_ps(mvec, col1); | |||||
mvec = _mm_set_ps1(matrix._23); | |||||
vec = _mm_add_ps(_mm_mul_ps(mvec, col2), vec); | |||||
mvec = _mm_set_ps1(matrix._33); | |||||
vec = _mm_add_ps(_mm_mul_ps(mvec, col3), vec); | |||||
mvec = _mm_set_ps1(matrix._43); | |||||
_mm_storeu_ps(result._data2d[2], _mm_add_ps(_mm_mul_ps(mvec, col4), vec)); | |||||
mvec = _mm_set_ps1(matrix._14); | |||||
vec = _mm_mul_ps(mvec, col1); | |||||
mvec = _mm_set_ps1(matrix._24); | |||||
vec = _mm_add_ps(_mm_mul_ps(mvec, col2), vec); | |||||
mvec = _mm_set_ps1(matrix._34); | |||||
vec = _mm_add_ps(_mm_mul_ps(mvec, col3), vec); | |||||
mvec = _mm_set_ps1(matrix._44); | |||||
_mm_storeu_ps(result._data2d[3], _mm_add_ps(_mm_mul_ps(mvec, col4), vec)); | |||||
return result; | |||||
} | |||||
// blend matrix SSE using only 4x3 and add onto existing blend | |||||
void AddBlendSSE(CMatrix3D* s, const CMatrix3D& m, float f) | |||||
{ | |||||
__m128 fvec = _mm_set_ps(0, f, f, f); | |||||
__m128 col = _mm_loadu_ps(s->_data2d[0]); | |||||
__m128 mcol = _mm_loadu_ps(m._data2d[0]); | |||||
_mm_storeu_ps(s->_data2d[0], _mm_add_ps(_mm_mul_ps(mcol, fvec), col)); | |||||
col = _mm_loadu_ps(s->_data2d[1]); | |||||
mcol = _mm_loadu_ps(m._data2d[1]); | |||||
_mm_storeu_ps(s->_data2d[1], _mm_add_ps(_mm_mul_ps(mcol, fvec), col)); | |||||
col = _mm_loadu_ps(s->_data2d[2]); | |||||
mcol = _mm_loadu_ps(m._data2d[2]); | |||||
_mm_storeu_ps(s->_data2d[2], _mm_add_ps(_mm_mul_ps(mcol, fvec), col)); | |||||
col = _mm_loadu_ps(s->_data2d[3]); | |||||
mcol = _mm_loadu_ps(m._data2d[3]); | |||||
_mm_storeu_ps(s->_data2d[3], _mm_add_ps(_mm_mul_ps(mcol, fvec), col)); | |||||
} | |||||
// blend matrix SSE using only 4x3 subset | |||||
void BlendSSE(CMatrix3D* s, const CMatrix3D& m, float f) | |||||
{ | |||||
__m128 fvec = _mm_set_ps(0, f, f, f); | |||||
__m128 mask = _mm_set_ps(1, 0, 0, 0); | |||||
__m128 col = _mm_loadu_ps(s->_data2d[0]); | |||||
__m128 mcol = _mm_loadu_ps(m._data2d[0]); | |||||
_mm_storeu_ps(s->_data2d[0], _mm_add_ps(_mm_mul_ps(col, mask), _mm_mul_ps(mcol, fvec))); | |||||
col = _mm_loadu_ps(s->_data2d[1]); | |||||
mcol = _mm_loadu_ps(m._data2d[1]); | |||||
_mm_storeu_ps(s->_data2d[1], _mm_add_ps(_mm_mul_ps(col, mask), _mm_mul_ps(mcol, fvec))); | |||||
col = _mm_loadu_ps(s->_data2d[2]); | |||||
mcol = _mm_loadu_ps(m._data2d[2]); | |||||
_mm_storeu_ps(s->_data2d[2], _mm_add_ps(_mm_mul_ps(col, mask), _mm_mul_ps(mcol, fvec))); | |||||
col = _mm_loadu_ps(s->_data2d[3]); | |||||
mcol = _mm_loadu_ps(m._data2d[3]); | |||||
_mm_storeu_ps(s->_data2d[3], _mm_add_ps(_mm_mul_ps(col, mask), _mm_mul_ps(mcol, fvec))); | |||||
} | |||||
#endif | |||||
CMatrix3D MultiplicationFallback(const CMatrix3D& source, const CMatrix3D& matrix) | |||||
{ | |||||
return CMatrix3D( | |||||
source._11 * matrix._11 + source._12 * matrix._21 + source._13 * matrix._31 + source._14 * matrix._41, | |||||
source._11 * matrix._12 + source._12 * matrix._22 + source._13 * matrix._32 + source._14 * matrix._42, | |||||
source._11 * matrix._13 + source._12 * matrix._23 + source._13 * matrix._33 + source._14 * matrix._43, | |||||
source._11 * matrix._14 + source._12 * matrix._24 + source._13 * matrix._34 + source._14 * matrix._44, | |||||
source._21 * matrix._11 + source._22 * matrix._21 + source._23 * matrix._31 + source._24 * matrix._41, | |||||
source._21 * matrix._12 + source._22 * matrix._22 + source._23 * matrix._32 + source._24 * matrix._42, | |||||
source._21 * matrix._13 + source._22 * matrix._23 + source._23 * matrix._33 + source._24 * matrix._43, | |||||
source._21 * matrix._14 + source._22 * matrix._24 + source._23 * matrix._34 + source._24 * matrix._44, | |||||
source._31 * matrix._11 + source._32 * matrix._21 + source._33 * matrix._31 + source._34 * matrix._41, | |||||
source._31 * matrix._12 + source._32 * matrix._22 + source._33 * matrix._32 + source._34 * matrix._42, | |||||
source._31 * matrix._13 + source._32 * matrix._23 + source._33 * matrix._33 + source._34 * matrix._43, | |||||
source._31 * matrix._14 + source._32 * matrix._24 + source._33 * matrix._34 + source._34 * matrix._44, | |||||
source._41 * matrix._11 + source._42 * matrix._21 + source._43 * matrix._31 + source._44 * matrix._41, | |||||
source._41 * matrix._12 + source._42 * matrix._22 + source._43 * matrix._32 + source._44 * matrix._42, | |||||
source._41 * matrix._13 + source._42 * matrix._23 + source._43 * matrix._33 + source._44 * matrix._43, | |||||
source._41 * matrix._14 + source._42 * matrix._24 + source._43 * matrix._34 + source._44 * matrix._44 | |||||
); | |||||
} | |||||
void BlendFallback(CMatrix3D* s, const CMatrix3D& m, float f) | |||||
{ | |||||
s->_11 = m._11 * f; s->_21 = m._21 * f; s->_31 = m._31 * f; | |||||
s->_12 = m._12 * f; s->_22 = m._22 * f; s->_32 = m._32 * f; | |||||
s->_13 = m._13 * f; s->_23 = m._23 * f; s->_33 = m._33 * f; | |||||
s->_14 = m._14 * f; s->_24 = m._24 * f; s->_34 = m._34 * f; | |||||
} | |||||
void AddBlendFallback(CMatrix3D* s, const CMatrix3D& m, float f) | |||||
{ | |||||
s->_11 += m._11 * f; s->_21 += m._21 * f; s->_31 += m._31 * f; | |||||
s->_12 += m._12 * f; s->_22 += m._22 * f; s->_32 += m._32 * f; | |||||
s->_13 += m._13 * f; s->_23 += m._23 * f; s->_33 += m._33 * f; | |||||
s->_14 += m._14 * f; s->_24 += m._24 * f; s->_34 += m._34 * f; | |||||
} | |||||
CMatrix3D(*CMatrix3D::Multiply)(const CMatrix3D& source, const CMatrix3D& matrix) = MultiplicationFallback; | |||||
void(*CMatrix3D::BlendMat)(CMatrix3D* s, const CMatrix3D& m, float f) = BlendFallback; | |||||
void(*CMatrix3D::AddBlendMat)(CMatrix3D* s, const CMatrix3D& m, float f) = AddBlendFallback; | |||||
void Matrix3DActivateFastImpl() | |||||
{ | |||||
#if COMPILER_HAS_SSE | |||||
if (HostHasSSE()) | |||||
{ | |||||
CMatrix3D::Multiply = MultiplicationSSE; | |||||
CMatrix3D::AddBlendMat = AddBlendSSE; | |||||
CMatrix3D::BlendMat = BlendSSE; | |||||
return; | |||||
} | |||||
#endif | |||||
} |
Wildfire Games · Phabricator