Index: build/premake/premake5.lua =================================================================== --- build/premake/premake5.lua +++ build/premake/premake5.lua @@ -210,6 +210,11 @@ flags { "MultiProcessorCompile" } + -- Windows 7 only supports processors with SSE2, so enable that. + if os.istarget("windows") then + vectorextensions "SSE2" + end + -- use native wchar_t type (not typedef to unsigned short) nativewchar "on" Index: source/graphics/Color.h =================================================================== --- source/graphics/Color.h +++ source/graphics/Color.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2019 Wildfire Games. +/* Copyright (C) 2020 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify @@ -24,17 +24,11 @@ // Simple defines for 3 and 4 component floating point colors - just map to // corresponding vector types. -typedef CVector3D RGBColor; -typedef CVector4D RGBAColor; +using RGBColor = CVector3D; +using RGBAColor = CVector4D; // Convert float RGB(A) colors to unsigned byte. -// Exposed as function pointer because it is set at init-time to -// one of several implementations depending on CPU caps. -extern SColor4ub (*ConvertRGBColorTo4ub)(const RGBColor& src); - -// call once ia32_Init has run; detects CPU caps and activates the best -// possible codepath. -extern void ColorActivateFastImpl(); +extern inline SColor4ub ConvertRGBColorTo4ub(const RGBColor& src); class CStr8; Index: source/graphics/Color.cpp =================================================================== --- source/graphics/Color.cpp +++ source/graphics/Color.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2019 Wildfire Games. +/* Copyright (C) 2020 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify @@ -24,12 +24,7 @@ #include "ps/CLogger.h" #include "ps/CStr.h" -#if HAVE_SSE -# include -# include "lib/sysdep/arch/x86_x64/x86_x64.h" -#endif - -static SColor4ub fallback_ConvertRGBColorTo4ub(const RGBColor& src) +static inline SColor4ub ConvertRGBColorTo4ub(const RGBColor& src) { SColor4ub result; result.R = Clamp(static_cast(src.X * 255), 0, 255); @@ -39,54 +34,6 @@ return result; } -// on IA32, this is replaced by an SSE assembly version in ia32.cpp -SColor4ub (*ConvertRGBColorTo4ub)(const RGBColor& src) = fallback_ConvertRGBColorTo4ub; - - -// Assembler-optimized function for color conversion -#if HAVE_SSE -static SColor4ub sse_ConvertRGBColorTo4ub(const RGBColor& src) -{ - const __m128 zero = _mm_setzero_ps(); - const __m128 _255 = _mm_set_ss(255.0f); - __m128 r = _mm_load_ss(&src.X); - __m128 g = _mm_load_ss(&src.Y); - __m128 b = _mm_load_ss(&src.Z); - - // C = min(255, 255*max(C, 0)) ( == Clamp(255*C, 0, 255) ) - r = _mm_max_ss(r, zero); - g = _mm_max_ss(g, zero); - b = _mm_max_ss(b, zero); - - r = _mm_mul_ss(r, _255); - g = _mm_mul_ss(g, _255); - b = _mm_mul_ss(b, _255); - - r = _mm_min_ss(r, _255); - g = _mm_min_ss(g, _255); - b = _mm_min_ss(b, _255); - - // convert to integer and combine channels using bit logic - int ri = _mm_cvtss_si32(r); - int gi = _mm_cvtss_si32(g); - int bi = _mm_cvtss_si32(b); - - return SColor4ub(ri, gi, bi, 0xFF); -} -#endif - -void ColorActivateFastImpl() -{ -#if HAVE_SSE - if (x86_x64::Cap(x86_x64::CAP_SSE)) - { - ConvertRGBColorTo4ub = sse_ConvertRGBColorTo4ub; - return; - } -#endif - debug_printf("No SSE available. Slow fallback routines will be used.\n"); -} - /** * Important: This function does not modify the value if parsing fails. */ Index: source/graphics/ModelDef.h =================================================================== --- source/graphics/ModelDef.h +++ source/graphics/ModelDef.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011 Wildfire Games. +/* Copyright (C) 2020 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify @@ -217,19 +217,6 @@ const size_t* blendIndices, const CMatrix3D newPoseMatrices[]); -#if HAVE_SSE - /** - * SSE-optimised version of SkinPointsAndNormals. - */ - static void SkinPointsAndNormals_SSE( - size_t numVertices, - const VertexArrayIterator& Position, - const VertexArrayIterator& Normal, - const SModelVertex* vertices, - const size_t* blendIndices, - const CMatrix3D newPoseMatrices[]); -#endif - /** * Blend bone matrices together to fill bone palette. */ Index: source/graphics/ModelDef.cpp =================================================================== --- source/graphics/ModelDef.cpp +++ source/graphics/ModelDef.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2015 Wildfire Games. +/* Copyright (C) 2020 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify @@ -26,10 +26,6 @@ #include "ps/FileIo.h" #include "maths/Vector4D.h" -#if HAVE_SSE -# include -#endif - CVector3D CModelDef::SkinPoint(const SModelVertex& vtx, const CMatrix3D newPoseMatrices[]) { @@ -121,88 +117,6 @@ } } -#if HAVE_SSE -void CModelDef::SkinPointsAndNormals_SSE( - size_t numVertices, - const VertexArrayIterator& Position, - const VertexArrayIterator& Normal, - const SModelVertex* vertices, - const size_t* blendIndices, - const CMatrix3D newPoseMatrices[]) -{ - // To avoid some performance overhead, get the raw vertex array pointers - char* PositionData = Position.GetData(); - size_t PositionStride = Position.GetStride(); - char* NormalData = Normal.GetData(); - size_t NormalStride = Normal.GetStride(); - - // Must be aligned correctly for SSE - ASSERT((intptr_t)newPoseMatrices % 16 == 0); - ASSERT((intptr_t)PositionData % 16 == 0); - ASSERT((intptr_t)PositionStride % 16 == 0); - ASSERT((intptr_t)NormalData % 16 == 0); - ASSERT((intptr_t)NormalStride % 16 == 0); - - __m128 col0, col1, col2, col3, vec0, vec1, vec2; - - for (size_t j = 0; j < numVertices; ++j) - { - const SModelVertex& vtx = vertices[j]; - const CMatrix3D& mtx = newPoseMatrices[blendIndices[j]]; - - // Loads matrix to xmm registers. - col0 = _mm_load_ps(mtx._data); - col1 = _mm_load_ps(mtx._data + 4); - col2 = _mm_load_ps(mtx._data + 8); - col3 = _mm_load_ps(mtx._data + 12); - - // Loads and computes vertex coordinates. - vec0 = _mm_load1_ps(&vtx.m_Coords.X); // v0 = [x, x, x, x] - vec0 = _mm_mul_ps(col0, vec0); // v0 = [_11*x, _21*x, _31*x, _41*x] - vec1 = _mm_load1_ps(&vtx.m_Coords.Y); // v1 = [y, y, y, y] - vec1 = _mm_mul_ps(col1, vec1); // v1 = [_12*y, _22*y, _32*y, _42*y] - vec0 = _mm_add_ps(vec0, vec1); // v0 = [_11*x + _12*y, ...] - vec1 = _mm_load1_ps(&vtx.m_Coords.Z); // v1 = [z, z, z, z] - vec1 = _mm_mul_ps(col2, vec1); // v1 = [_13*z, _23*z, _33*z, _43*z] - vec1 = _mm_add_ps(vec1, col3); // v1 = [_13*z + _14, ...] - vec0 = _mm_add_ps(vec0, vec1); // v0 = [_11*x + _12*y + _13*z + _14, ...] - _mm_store_ps((float*)(PositionData + PositionStride*j), vec0); - - // Loads and computes normal vectors. - vec0 = _mm_load1_ps(&vtx.m_Norm.X); // v0 = [x, x, x, x] - vec0 = _mm_mul_ps(col0, vec0); // v0 = [_11*x, _21*x, _31*x, _41*x] - vec1 = _mm_load1_ps(&vtx.m_Norm.Y); // v1 = [y, y, y, y] - vec1 = _mm_mul_ps(col1, vec1); // v1 = [_12*y, _22*y, _32*y, _42*y] - vec0 = _mm_add_ps(vec0, vec1); // v0 = [_11*x + _12*y, ...] - vec1 = _mm_load1_ps(&vtx.m_Norm.Z); // v1 = [z, z, z, z] - vec1 = _mm_mul_ps(col2, vec1); // v1 = [_13*z, _23*z, _33*z, _43*z] - vec0 = _mm_add_ps(vec0, vec1); // v0 = [_11*x + _12*y + _13*z, ...] - - // If there was more than one influence, the result is probably not going - // to be of unit length (since it's a weighted sum of several independent - // unit vectors), so we need to normalise it. - // (It's fairly common to only have one influence, so it seems sensible to - // optimise that case a bit.) - if (vtx.m_Blend.m_Bone[1] != 0xff) // if more than one influence - { - // Normalization. - // vec1 = [x*x, y*y, z*z, ?*?] - vec1 = _mm_mul_ps(vec0, vec0); - // vec2 = [y*y, z*z, x*x, ?*?] - vec2 = _mm_shuffle_ps(vec1, vec1, _MM_SHUFFLE(3, 0, 2, 1)); - vec1 = _mm_add_ps(vec1, vec2); - // vec2 = [z*z, x*x, y*y, ?*?] - vec2 = _mm_shuffle_ps(vec2, vec2, _MM_SHUFFLE(3, 0, 2, 1)); - vec1 = _mm_add_ps(vec1, vec2); - // rsqrt(a) = 1 / sqrt(a) - vec1 = _mm_rsqrt_ps(vec1); - vec0 = _mm_mul_ps(vec0, vec1); - } - _mm_store_ps((float*)(NormalData + NormalStride*j), vec0); - } -} -#endif - void CModelDef::BlendBoneMatrices( CMatrix3D boneMatrices[]) { Index: source/graphics/tests/test_Color.h =================================================================== --- source/graphics/tests/test_Color.h +++ source/graphics/tests/test_Color.h @@ -26,7 +26,6 @@ public: void setUp() { - ColorActivateFastImpl(); } void test_Color4ub() Index: source/lib/sysdep/compiler.h =================================================================== --- source/lib/sysdep/compiler.h +++ source/lib/sysdep/compiler.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2019 Wildfire Games. +/* Copyright (c) 2020 Wildfire Games. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the @@ -94,27 +94,4 @@ # endif #endif -// Streaming SIMD Extensions (not supported by all GCC) -// this only ascertains compiler support; use x86_x64::Cap to -// check whether the instructions are supported by the CPU. -#ifndef HAVE_SSE -# if GCC_VERSION && defined(__SSE__) -# define HAVE_SSE 1 -# elif MSC_VERSION // also includes ICC -# define HAVE_SSE 1 -# else -# define HAVE_SSE 0 -# endif -#endif - -#ifndef HAVE_SSE2 -# if GCC_VERSION && defined(__SSE2__) -# define HAVE_SSE2 1 -# elif MSC_VERSION // also includes ICC -# define HAVE_SSE2 1 -# else -# define HAVE_SSE2 0 -# endif -#endif - #endif // #ifndef INCLUDED_COMPILER Index: source/ps/GameSetup/GameSetup.cpp =================================================================== --- source/ps/GameSetup/GameSetup.cpp +++ source/ps/GameSetup/GameSetup.cpp @@ -612,7 +612,6 @@ vp.m_Height = g_yres; g_Renderer.SetViewport(vp); - ColorActivateFastImpl(); ModelRenderer::Init(); } Index: source/renderer/ModelRenderer.cpp =================================================================== --- source/renderer/ModelRenderer.cpp +++ source/renderer/ModelRenderer.cpp @@ -41,23 +41,11 @@ #include "renderer/TimeManager.h" #include "renderer/WaterManager.h" -#if ARCH_X86_X64 -# include "lib/sysdep/arch/x86_x64/x86_x64.h" -#endif - /////////////////////////////////////////////////////////////////////////////////////////////// // ModelRenderer implementation -#if ARCH_X86_X64 -static bool g_EnableSSE = false; -#endif - void ModelRenderer::Init() { -#if ARCH_X86_X64 - if (x86_x64::Cap(x86_x64::CAP_SSE)) - g_EnableSSE = true; -#endif } // Helper function to copy object-space position and normal vectors into arrays. @@ -98,16 +86,7 @@ return; } -#if HAVE_SSE - if (g_EnableSSE) - { - CModelDef::SkinPointsAndNormals_SSE(numVertices, Position, Normal, vertices, mdef->GetBlendIndices(), model->GetAnimatedBoneMatrices()); - } - else -#endif - { - CModelDef::SkinPointsAndNormals(numVertices, Position, Normal, vertices, mdef->GetBlendIndices(), model->GetAnimatedBoneMatrices()); - } + CModelDef::SkinPointsAndNormals(numVertices, Position, Normal, vertices, mdef->GetBlendIndices(), model->GetAnimatedBoneMatrices()); } else { @@ -429,7 +408,7 @@ ModelList_t, SMRMaterialBucketKeyHash, std::equal_to, - ProxyAllocator< + ProxyAllocator< std::pair, Allocators::DynamicArena> >;