@OptimusShepard is right, as shown in P149, the code is now good (it's possible it wasn't on old MSVC); It also seems we've been forcing SSE2 for a while now on Windows, so if you didn't have SSE instructions the game would segfault anyway, cause the clean function would have been replaced by them. So the code wouldn't work anyway.
BTW for those wondering getting assembly is very easy on Visual Studio. Just add a breakpoint into the function and right click on it.
{F1748226, size=full}
Here are the generated functions.
```name=SkinPointsAndNormals,lines=10,lang=cpp
void CModelDef::SkinPointsAndNormals(
size_t numVertices,
const VertexArrayIterator<CVector3D>& Position,
const VertexArrayIterator<CVector3D>& Normal,
const SModelVertex* vertices,
const size_t* blendIndices,
const CMatrix3D newPoseMatrices[])
{
push ebx
push esi
push edi
// To avoid some performance overhead, get the raw vertex array pointers
char* PositionData = Position.GetData();
mov esi,dword ptr [eax]
xor ebx,ebx
size_t PositionStride = Position.GetStride();
mov eax,dword ptr [eax+4]
mov dword ptr [ebp-4],eax
char* NormalData = Normal.GetData();
mov eax,dword ptr [Normal]
char* NormalData = Normal.GetData();
mov edi,dword ptr [eax]
size_t NormalStride = Normal.GetStride();
mov eax,dword ptr [eax+4]
mov dword ptr [Normal],eax
for (size_t j = 0; j < numVertices; ++j)
cmp dword ptr [numVertices],ebx
jbe CModelDef::SkinPointsAndNormals+1D3h (0534CF3h)
// To avoid some performance overhead, get the raw vertex array pointers
char* PositionData = Position.GetData();
mov ecx,dword ptr [vertices]
mov edx,dword ptr [newPoseMatrices]
add ecx,14h
mov dword ptr [Position],ecx
nop word ptr [eax+eax]
{
const SModelVertex& vtx = vertices[j];
CVector3D pos = newPoseMatrices[blendIndices[j]].Transform(vtx.m_Coords);
mov eax,dword ptr [blendIndices]
movss xmm2,dword ptr [ecx-14h]
movss xmm4,dword ptr [ecx-10h]
movaps xmm0,xmm2
movss xmm3,dword ptr [ecx-0Ch]
mov eax,dword ptr [eax+ebx*4]
shl eax,6
// If there was more than one influence, the result is probably not going
// to be of unit length (since it's a weighted sum of several independent
// unit vectors), so we need to normalise it.
// (It's fairly common to only have one influence, so it seems sensible to
// optimise that case a bit.)
if (vtx.m_Blend.m_Bone[1] != 0xff) // if more than one influence
cmp byte ptr [ecx+11h],0FFh
{
const SModelVertex& vtx = vertices[j];
CVector3D pos = newPoseMatrices[blendIndices[j]].Transform(vtx.m_Coords);
mulss xmm0,dword ptr [eax+edx]
movss xmm1,dword ptr [eax+edx+10h]
mulss xmm1,xmm4
addss xmm1,xmm0
movss xmm0,dword ptr [eax+edx+20h]
mulss xmm0,xmm3
addss xmm1,xmm0
movss xmm0,dword ptr [eax+edx+14h]
mulss xmm0,xmm4
addss xmm1,dword ptr [eax+edx+30h]
movss dword ptr [pos],xmm1
movss xmm1,dword ptr [eax+edx+4]
mulss xmm1,xmm2
addss xmm1,xmm0
movss xmm0,dword ptr [eax+edx+24h]
mulss xmm0,xmm3
addss xmm1,xmm0
movss xmm0,dword ptr [eax+edx+18h]
mulss xmm0,xmm4
CVector3D norm = newPoseMatrices[blendIndices[j]].Rotate(vtx.m_Norm);
movss xmm4,dword ptr [ecx-4]
{
const SModelVertex& vtx = vertices[j];
CVector3D pos = newPoseMatrices[blendIndices[j]].Transform(vtx.m_Coords);
addss xmm1,dword ptr [eax+edx+34h]
movss dword ptr [ebp-18h],xmm1
movss xmm1,dword ptr [eax+edx+8]
mulss xmm1,xmm2
CVector3D norm = newPoseMatrices[blendIndices[j]].Rotate(vtx.m_Norm);
movss xmm2,dword ptr [ecx-8]
{
const SModelVertex& vtx = vertices[j];
CVector3D pos = newPoseMatrices[blendIndices[j]].Transform(vtx.m_Coords);
addss xmm1,xmm0
movss xmm0,dword ptr [eax+edx+28h]
mulss xmm0,xmm3
CVector3D norm = newPoseMatrices[blendIndices[j]].Rotate(vtx.m_Norm);
movss xmm3,dword ptr [ecx]
{
const SModelVertex& vtx = vertices[j];
CVector3D pos = newPoseMatrices[blendIndices[j]].Transform(vtx.m_Coords);
addss xmm1,xmm0
CVector3D norm = newPoseMatrices[blendIndices[j]].Rotate(vtx.m_Norm);
movaps xmm0,xmm2
{
const SModelVertex& vtx = vertices[j];
CVector3D pos = newPoseMatrices[blendIndices[j]].Transform(vtx.m_Coords);
addss xmm1,dword ptr [eax+edx+38h]
CVector3D norm = newPoseMatrices[blendIndices[j]].Rotate(vtx.m_Norm);
mov dword ptr [norm],0
mov dword ptr [ebp-0Ch],0
mov dword ptr [ebp-8],0
mulss xmm0,dword ptr [eax+edx]
{
const SModelVertex& vtx = vertices[j];
CVector3D pos = newPoseMatrices[blendIndices[j]].Transform(vtx.m_Coords);
movss dword ptr [ebp-14h],xmm1
CVector3D norm = newPoseMatrices[blendIndices[j]].Rotate(vtx.m_Norm);
movss xmm1,dword ptr [eax+edx+10h]
mulss xmm1,xmm4
addss xmm1,xmm0
movss xmm0,dword ptr [eax+edx+20h]
mulss xmm0,xmm3
addss xmm1,xmm0
movss dword ptr [norm],xmm1
movss xmm1,dword ptr [eax+edx+4]
movss xmm0,dword ptr [eax+edx+14h]
mulss xmm0,xmm4
mulss xmm1,xmm2
addss xmm1,xmm0
movss xmm0,dword ptr [eax+edx+24h]
mulss xmm0,xmm3
addss xmm1,xmm0
movss dword ptr [ebp-0Ch],xmm1
movss xmm0,dword ptr [eax+edx+18h]
movss xmm1,dword ptr [eax+edx+8]
mulss xmm0,xmm4
mulss xmm1,xmm2
addss xmm1,xmm0
movss xmm0,dword ptr [eax+edx+28h]
mulss xmm0,xmm3
addss xmm1,xmm0
movss dword ptr [ebp-8],xmm1
// If there was more than one influence, the result is probably not going
// to be of unit length (since it's a weighted sum of several independent
// unit vectors), so we need to normalise it.
// (It's fairly common to only have one influence, so it seems sensible to
// optimise that case a bit.)
if (vtx.m_Blend.m_Bone[1] != 0xff) // if more than one influence
je CModelDef::SkinPointsAndNormals+19Fh (0534CBFh)
norm.Normalize();
lea ecx,[norm]
call CVector3D::Normalize (0489820h)
mov ecx,dword ptr [ebp+0Ch]
mov edx,dword ptr [newPoseMatrices]
memcpy(PositionData + PositionStride*j, &pos.X, 3*sizeof(float));
movq xmm0,mmword ptr [pos]
inc ebx
mov eax,dword ptr [ebp-14h]
add ecx,38h
movq mmword ptr [esi],xmm0
memcpy(NormalData + NormalStride*j, &norm.X, 3*sizeof(float));
movq xmm0,mmword ptr [norm]
mov dword ptr [esi+8],eax
mov eax,dword ptr [ebp-8]
add esi,dword ptr [PositionStride]
movq mmword ptr [edi],xmm0
mov dword ptr [edi+8],eax
add edi,dword ptr [NormalStride]
mov dword ptr [ebp+0Ch],ecx
cmp ebx,dword ptr [numVertices]
jb CModelDef::SkinPointsAndNormals+40h (0534B60h)
pop edi
}
}
pop esi
pop ebx
mov esp,ebp
pop ebp
ret
```
```name=ConvertRGBColorTo4ub,lines=10,lang=cpp
inline SColor4ub ConvertRGBColorTo4ub(const RGBColor& src)
{
push ebp
mov ebp,esp
SColor4ub result;
result.R = Clamp(static_cast<int>(src.X * 255), 0, 255);
mov edx,dword ptr [src]
movss xmm1,dword ptr [__real@437f0000 (069EDC8h)]
push esi
mov esi,0FFh
movss xmm0,dword ptr [edx]
mulss xmm0,xmm1
SColor4ub result;
result.R = Clamp(static_cast<int>(src.X * 255), 0, 255);
cvttss2si eax,xmm0
test eax,eax
jg ConvertRGBColorTo4ub+28h (04CFA58h)
xor eax,eax
jmp ConvertRGBColorTo4ub+30h (04CFA60h)
cmp eax,0FFh
cmovge eax,esi
result.G = Clamp(static_cast<int>(src.Y * 255), 0, 255);
movss xmm0,dword ptr [edx+4]
mov ecx,dword ptr [ebp+8]
mulss xmm0,xmm1
mov byte ptr [ecx],al
cvttss2si eax,xmm0
test eax,eax
jg ConvertRGBColorTo4ub+4Ah (04CFA7Ah)
xor eax,eax
jmp ConvertRGBColorTo4ub+52h (04CFA82h)
cmp eax,0FFh
cmovge eax,esi
result.B = Clamp(static_cast<int>(src.Z * 255), 0, 255);
movss xmm0,dword ptr [edx+8]
mulss xmm0,xmm1
mov byte ptr [ecx+1],al
cvttss2si eax,xmm0
test eax,eax
jg ConvertRGBColorTo4ub+74h (04CFAA4h)
xor eax,eax
result.A = 255;
mov byte ptr [ecx+3],0FFh
mov byte ptr [ecx+2],al
return result;
mov eax,ecx
pop esi
}
pop ebp
ret
result.B = Clamp(static_cast<int>(src.Z * 255), 0, 255);
cmp eax,0FFh
result.A = 255;
mov byte ptr [ecx+3],0FFh
result.B = Clamp(static_cast<int>(src.Z * 255), 0, 255);
cmovge eax,esi
mov byte ptr [ecx+2],al
return result;
mov eax,ecx
pop esi
}
pop ebp
ret
```