@OptimusShepard is right, as shown in P149, the code is now good (it's possible it wasn't on old MSVC); It also seems we've been forcing SSE2 for a while now on Windows, so if you didn't have SSE instructions the game would segfault anyway, cause the clean function would have been replaced by them. So the code wouldn't work anyway.
BTW for those wondering getting assembly is very easy on Visual Studio. Just add a breakpoint into the function and right click on it.
Here are the generated functions.
void CModelDef::SkinPointsAndNormals( size_t numVertices, const VertexArrayIterator<CVector3D>& Position, const VertexArrayIterator<CVector3D>& Normal, const SModelVertex* vertices, const size_t* blendIndices, const CMatrix3D newPoseMatrices[]) { push ebx push esi push edi // To avoid some performance overhead, get the raw vertex array pointers char* PositionData = Position.GetData(); mov esi,dword ptr [eax] xor ebx,ebx size_t PositionStride = Position.GetStride(); mov eax,dword ptr [eax+4] mov dword ptr [ebp-4],eax char* NormalData = Normal.GetData(); mov eax,dword ptr [Normal] char* NormalData = Normal.GetData(); mov edi,dword ptr [eax] size_t NormalStride = Normal.GetStride(); mov eax,dword ptr [eax+4] mov dword ptr [Normal],eax for (size_t j = 0; j < numVertices; ++j) cmp dword ptr [numVertices],ebx jbe CModelDef::SkinPointsAndNormals+1D3h (0534CF3h) // To avoid some performance overhead, get the raw vertex array pointers char* PositionData = Position.GetData(); mov ecx,dword ptr [vertices] mov edx,dword ptr [newPoseMatrices] add ecx,14h mov dword ptr [Position],ecx nop word ptr [eax+eax] { const SModelVertex& vtx = vertices[j]; CVector3D pos = newPoseMatrices[blendIndices[j]].Transform(vtx.m_Coords); mov eax,dword ptr [blendIndices] movss xmm2,dword ptr [ecx-14h] movss xmm4,dword ptr [ecx-10h] movaps xmm0,xmm2 movss xmm3,dword ptr [ecx-0Ch] mov eax,dword ptr [eax+ebx*4] shl eax,6 // If there was more than one influence, the result is probably not going // to be of unit length (since it's a weighted sum of several independent // unit vectors), so we need to normalise it. // (It's fairly common to only have one influence, so it seems sensible to // optimise that case a bit.) if (vtx.m_Blend.m_Bone[1] != 0xff) // if more than one influence cmp byte ptr [ecx+11h],0FFh { const SModelVertex& vtx = vertices[j]; CVector3D pos = newPoseMatrices[blendIndices[j]].Transform(vtx.m_Coords); mulss xmm0,dword ptr [eax+edx] movss xmm1,dword ptr [eax+edx+10h] mulss xmm1,xmm4 addss xmm1,xmm0 movss xmm0,dword ptr [eax+edx+20h] mulss xmm0,xmm3 addss xmm1,xmm0 movss xmm0,dword ptr [eax+edx+14h] mulss xmm0,xmm4 addss xmm1,dword ptr [eax+edx+30h] movss dword ptr [pos],xmm1 movss xmm1,dword ptr [eax+edx+4] mulss xmm1,xmm2 addss xmm1,xmm0 movss xmm0,dword ptr [eax+edx+24h] mulss xmm0,xmm3 addss xmm1,xmm0 movss xmm0,dword ptr [eax+edx+18h] mulss xmm0,xmm4 CVector3D norm = newPoseMatrices[blendIndices[j]].Rotate(vtx.m_Norm); movss xmm4,dword ptr [ecx-4] { const SModelVertex& vtx = vertices[j]; CVector3D pos = newPoseMatrices[blendIndices[j]].Transform(vtx.m_Coords); addss xmm1,dword ptr [eax+edx+34h] movss dword ptr [ebp-18h],xmm1 movss xmm1,dword ptr [eax+edx+8] mulss xmm1,xmm2 CVector3D norm = newPoseMatrices[blendIndices[j]].Rotate(vtx.m_Norm); movss xmm2,dword ptr [ecx-8] { const SModelVertex& vtx = vertices[j]; CVector3D pos = newPoseMatrices[blendIndices[j]].Transform(vtx.m_Coords); addss xmm1,xmm0 movss xmm0,dword ptr [eax+edx+28h] mulss xmm0,xmm3 CVector3D norm = newPoseMatrices[blendIndices[j]].Rotate(vtx.m_Norm); movss xmm3,dword ptr [ecx] { const SModelVertex& vtx = vertices[j]; CVector3D pos = newPoseMatrices[blendIndices[j]].Transform(vtx.m_Coords); addss xmm1,xmm0 CVector3D norm = newPoseMatrices[blendIndices[j]].Rotate(vtx.m_Norm); movaps xmm0,xmm2 { const SModelVertex& vtx = vertices[j]; CVector3D pos = newPoseMatrices[blendIndices[j]].Transform(vtx.m_Coords); addss xmm1,dword ptr [eax+edx+38h] CVector3D norm = newPoseMatrices[blendIndices[j]].Rotate(vtx.m_Norm); mov dword ptr [norm],0 mov dword ptr [ebp-0Ch],0 mov dword ptr [ebp-8],0 mulss xmm0,dword ptr [eax+edx] { const SModelVertex& vtx = vertices[j]; CVector3D pos = newPoseMatrices[blendIndices[j]].Transform(vtx.m_Coords); movss dword ptr [ebp-14h],xmm1 CVector3D norm = newPoseMatrices[blendIndices[j]].Rotate(vtx.m_Norm); movss xmm1,dword ptr [eax+edx+10h] mulss xmm1,xmm4 addss xmm1,xmm0 movss xmm0,dword ptr [eax+edx+20h] mulss xmm0,xmm3 addss xmm1,xmm0 movss dword ptr [norm],xmm1 movss xmm1,dword ptr [eax+edx+4] movss xmm0,dword ptr [eax+edx+14h] mulss xmm0,xmm4 mulss xmm1,xmm2 addss xmm1,xmm0 movss xmm0,dword ptr [eax+edx+24h] mulss xmm0,xmm3 addss xmm1,xmm0 movss dword ptr [ebp-0Ch],xmm1 movss xmm0,dword ptr [eax+edx+18h] movss xmm1,dword ptr [eax+edx+8] mulss xmm0,xmm4 mulss xmm1,xmm2 addss xmm1,xmm0 movss xmm0,dword ptr [eax+edx+28h] mulss xmm0,xmm3 addss xmm1,xmm0 movss dword ptr [ebp-8],xmm1 // If there was more than one influence, the result is probably not going // to be of unit length (since it's a weighted sum of several independent // unit vectors), so we need to normalise it. // (It's fairly common to only have one influence, so it seems sensible to // optimise that case a bit.) if (vtx.m_Blend.m_Bone[1] != 0xff) // if more than one influence je CModelDef::SkinPointsAndNormals+19Fh (0534CBFh) norm.Normalize(); lea ecx,[norm] call CVector3D::Normalize (0489820h) mov ecx,dword ptr [ebp+0Ch] mov edx,dword ptr [newPoseMatrices] memcpy(PositionData + PositionStride*j, &pos.X, 3*sizeof(float)); movq xmm0,mmword ptr [pos] inc ebx mov eax,dword ptr [ebp-14h] add ecx,38h movq mmword ptr [esi],xmm0 memcpy(NormalData + NormalStride*j, &norm.X, 3*sizeof(float)); movq xmm0,mmword ptr [norm] mov dword ptr [esi+8],eax mov eax,dword ptr [ebp-8] add esi,dword ptr [PositionStride] movq mmword ptr [edi],xmm0 mov dword ptr [edi+8],eax add edi,dword ptr [NormalStride] mov dword ptr [ebp+0Ch],ecx cmp ebx,dword ptr [numVertices] jb CModelDef::SkinPointsAndNormals+40h (0534B60h) pop edi } } pop esi pop ebx mov esp,ebp pop ebp ret
inline SColor4ub ConvertRGBColorTo4ub(const RGBColor& src) { push ebp mov ebp,esp SColor4ub result; result.R = Clamp(static_cast<int>(src.X * 255), 0, 255); mov edx,dword ptr [src] movss xmm1,dword ptr [__real@437f0000 (069EDC8h)] push esi mov esi,0FFh movss xmm0,dword ptr [edx] mulss xmm0,xmm1 SColor4ub result; result.R = Clamp(static_cast<int>(src.X * 255), 0, 255); cvttss2si eax,xmm0 test eax,eax jg ConvertRGBColorTo4ub+28h (04CFA58h) xor eax,eax jmp ConvertRGBColorTo4ub+30h (04CFA60h) cmp eax,0FFh cmovge eax,esi result.G = Clamp(static_cast<int>(src.Y * 255), 0, 255); movss xmm0,dword ptr [edx+4] mov ecx,dword ptr [ebp+8] mulss xmm0,xmm1 mov byte ptr [ecx],al cvttss2si eax,xmm0 test eax,eax jg ConvertRGBColorTo4ub+4Ah (04CFA7Ah) xor eax,eax jmp ConvertRGBColorTo4ub+52h (04CFA82h) cmp eax,0FFh cmovge eax,esi result.B = Clamp(static_cast<int>(src.Z * 255), 0, 255); movss xmm0,dword ptr [edx+8] mulss xmm0,xmm1 mov byte ptr [ecx+1],al cvttss2si eax,xmm0 test eax,eax jg ConvertRGBColorTo4ub+74h (04CFAA4h) xor eax,eax result.A = 255; mov byte ptr [ecx+3],0FFh mov byte ptr [ecx+2],al return result; mov eax,ecx pop esi } pop ebp ret result.B = Clamp(static_cast<int>(src.Z * 255), 0, 255); cmp eax,0FFh result.A = 255; mov byte ptr [ecx+3],0FFh result.B = Clamp(static_cast<int>(src.Z * 255), 0, 255); cmovge eax,esi mov byte ptr [ecx+2],al return result; mov eax,ecx pop esi } pop ebp ret