Index: source/maths/Vector3D.h =================================================================== --- source/maths/Vector3D.h +++ source/maths/Vector3D.h @@ -57,63 +57,136 @@ CVector3D operator+(const CVector3D& vector) const { - return CVector3D(X + vector.X, Y + vector.Y, Z + vector.Z); + //PROFILE2("+"); + float result[3]; + __m128 vec = _mm_set_ps(0.0f, Z, Y, X); + __m128 vec2 = _mm_set_ps(0.0f, vector.Z, vector.Y, vector.X); + vec = _mm_add_ps(vec, vec2); + _mm_store_ps(result, vec); + return CVector3D(result[0], result[1], result[2]); + //return CVector3D(X + vector.X, Y + vector.Y, Z + vector.Z); } CVector3D& operator+=(const CVector3D& vector) { - X += vector.X; - Y += vector.Y; - Z += vector.Z; + //PROFILE2("+="); + float result[3]; + __m128 vec = _mm_set_ps(0.0f, Z, Y, X); + __m128 vec2 = _mm_set_ps(0.0f, vector.Z, vector.Y, vector.X); + vec = _mm_add_ps(vec, vec2); + _mm_store_ps(result, vec); + X = result[0]; + Y = result[1]; + Z = result[2]; + //X += vector.X; + //Y += vector.Y; + //Z += vector.Z; return *this; } CVector3D operator-(const CVector3D& vector) const { - return CVector3D(X - vector.X, Y - vector.Y, Z - vector.Z); + //PROFILE2("-"); + float result[3]; + __m128 vec = _mm_set_ps(0.0f, Z, Y, X); + __m128 vec2 = _mm_set_ps(0.0f, vector.Z, vector.Y, vector.X); + vec = _mm_sub_ps(vec, vec2); + _mm_store_ps(result, vec); + return CVector3D(result[0], result[1], result[2]); + //return CVector3D(X - vector.X, Y - vector.Y, Z - vector.Z); } CVector3D& operator-=(const CVector3D& vector) { - X -= vector.X; - Y -= vector.Y; - Z -= vector.Z; + //PROFILE2("-="); + float result[3]; + __m128 vec = _mm_set_ps(0.0f, Z, Y, X); + __m128 vec2 = _mm_set_ps(0.0f, vector.Z, vector.Y, vector.X); + vec = _mm_sub_ps(vec, vec2); + _mm_store_ps(result, vec); + X = result[0]; + Y = result[1]; + Z = result[2]; + //X -= vector.X; + //Y -= vector.Y; + //Z -= vector.Z; return *this; } CVector3D operator*(float value) const { - return CVector3D(X * value, Y * value, Z * value); + //PROFILE2("*"); + float result[3]; + __m128 vec = _mm_set_ps(0.0f, Z, Y, X); + __m128 vec2 = _mm_set_ps1(value); + vec = _mm_mul_ps(vec, vec2); + _mm_store_ps(result, vec); + return CVector3D(result[0], result[1], result[2]); + //return CVector3D(X * value, Y * value, Z * value); } CVector3D& operator*=(float value) { - X *= value; - Y *= value; - Z *= value; + //PROFILE2("*="); + float result[3]; + __m128 vec = _mm_set_ps(0.0f, Z, Y, X); + __m128 vec2 = _mm_set_ps1(value); + vec = _mm_mul_ps(vec, vec2); + _mm_store_ps(result, vec); + X = result[0]; + Y = result[1]; + Z = result[2]; + //X *= value; + //Y *= value; + //Z *= value; return *this; } CVector3D operator-() const { - return CVector3D(-X, -Y, -Z); + //PROFILE2("Negate"); + float result[3]; + __m128 vec = _mm_set_ps(0.0f, Z, Y, X); + __m128 vec2 = _mm_setzero_ps(); + vec = _mm_sub_ps(vec2, vec); + _mm_store_ps(result, vec); + return CVector3D(result[0], result[1], result[2]); + //return CVector3D(-X, -Y, -Z); } public: float Dot (const CVector3D &vector) const { - return ( X * vector.X + - Y * vector.Y + - Z * vector.Z ); + //PROFILE2("Dot"); + __m128 vec = _mm_set_ps(0.0f, Z, Y, X); + __m128 vec2 = _mm_set_ps(0.0f, vector.Z, vector.Y, vector.X); + __m128 temp = _mm_mul_ps(vec, vec2); + __m128 shuf = _mm_shuffle_ps(temp, temp, _MM_SHUFFLE(2, 3, 0, 1)); + __m128 sums = _mm_add_ps(temp, shuf); + shuf = _mm_movehl_ps(shuf, sums); + sums = _mm_add_ss(sums, shuf); + return _mm_cvtss_f32(sums); + //return (X * vector.X + + // Y * vector.Y + + // Z * vector.Z); } CVector3D Cross (const CVector3D &vector) const { - CVector3D Temp; - Temp.X = (Y * vector.Z) - (Z * vector.Y); - Temp.Y = (Z * vector.X) - (X * vector.Z); - Temp.Z = (X * vector.Y) - (Y * vector.X); - return Temp; + //PROFILE2("Cross"); + float result[3]; + __m128 vec = _mm_set_ps(0.0f, Z, Y, X); + __m128 vec2 = _mm_set_ps(0.0f, vector.Z, vector.Y, vector.X); + __m128 temp = _mm_sub_ps( + _mm_mul_ps(_mm_shuffle_ps(vec, vec, _MM_SHUFFLE(3, 0, 2, 1)), _mm_shuffle_ps(vec2, vec2, _MM_SHUFFLE(3, 1, 0, 2))), + _mm_mul_ps(_mm_shuffle_ps(vec, vec, _MM_SHUFFLE(3, 1, 0, 2)), _mm_shuffle_ps(vec2, vec2, _MM_SHUFFLE(3, 0, 2, 1)))); + _mm_store_ps(result, temp); + return CVector3D(result[0], result[1], result[2]); + //CVector3D Temp; + //Temp.X = (Y * vector.Z) - (Z * vector.Y); + //Temp.Y = (Z * vector.X) - (X * vector.Z); + //Temp.Z = (X * vector.Y) - (Y * vector.X); + //return Temp; } float Length () const; Index: source/maths/Vector3D.cpp =================================================================== --- source/maths/Vector3D.cpp +++ source/maths/Vector3D.cpp @@ -62,7 +62,15 @@ float CVector3D::LengthSquared () const { - return ( SQR(X) + SQR(Y) + SQR(Z) ); + //PROFILE2("LengthSquared"); + __m128 vec = _mm_set_ps(0.0f, Z, Y, X); + __m128 temp = _mm_mul_ps(vec, vec); + __m128 shuf = _mm_shuffle_ps(temp, temp, _MM_SHUFFLE(2, 3, 0, 1)); + __m128 sums = _mm_add_ps(temp, shuf); + shuf = _mm_movehl_ps(shuf, sums); + sums = _mm_add_ss(sums, shuf); + return _mm_cvtss_f32(sums); + //return (SQR(X) + SQR(Y) + SQR(Z)); } float CVector3D::Length () const @@ -70,20 +78,46 @@ return sqrtf ( LengthSquared() ); } -void CVector3D::Normalize () +void CVector3D::Normalize() { - float scale = 1.0f/Length (); - - X *= scale; - Y *= scale; - Z *= scale; + //PROFILE2("Normalize"); + float result[3]; + __m128 vec0 = _mm_set_ps(0.0f, Z, Y, X); + __m128 vec1 = _mm_mul_ps(vec0, vec0); + __m128 vec2 = _mm_shuffle_ps(vec1, vec1, _MM_SHUFFLE(3, 0, 2, 1)); + vec1 = _mm_add_ps(vec1, vec2); + vec2 = _mm_shuffle_ps(vec2, vec2, _MM_SHUFFLE(3, 0, 2, 1)); + vec1 = _mm_add_ps(vec1, vec2); + vec1 = _mm_rsqrt_ps(vec1); + vec0 = _mm_mul_ps(vec0, vec1); + _mm_store_ps(result, vec0); + X = result[0]; + Y = result[1]; + Z = result[2]; + //float scale = 1.0f / Length(); + // + //X *= scale; + //Y *= scale; + //Z *= scale; } CVector3D CVector3D::Normalized () const { - float scale = 1.0f/Length (); - - return CVector3D(X * scale, Y * scale, Z * scale); + //PROFILE2("Normalized"); + float result[3]; + __m128 vec0 = _mm_set_ps(0.0f, Z, Y, X); + __m128 vec1 = _mm_mul_ps(vec0, vec0); + __m128 vec2 = _mm_shuffle_ps(vec1, vec1, _MM_SHUFFLE(3, 0, 2, 1)); + vec1 = _mm_add_ps(vec1, vec2); + vec2 = _mm_shuffle_ps(vec2, vec2, _MM_SHUFFLE(3, 0, 2, 1)); + vec1 = _mm_add_ps(vec1, vec2); + vec1 = _mm_rsqrt_ps(vec1); + vec0 = _mm_mul_ps(vec0, vec1); + _mm_store_ps(result, vec0); + return CVector3D(result[0], result[1], result[2]); + //float scale = 1.0f / Length(); + // + //return CVector3D(X * scale, Y * scale, Z * scale); }