Index: binaries/data/mods/public/shaders/glsl/model_common.vs =================================================================== --- binaries/data/mods/public/shaders/glsl/model_common.vs +++ binaries/data/mods/public/shaders/glsl/model_common.vs @@ -1,4 +1,5 @@ #version 120 +#extension GL_ARB_draw_instanced : enable uniform mat4 transform; uniform vec3 cameraPos; @@ -12,6 +13,7 @@ uniform vec2 losTransform; uniform mat4 shadowTransform; uniform mat4 instancingTransform; +uniform mat4 instancingTransformReal[32]; #if USE_SHADOW_SAMPLER && USE_SHADOW_PCF uniform vec4 shadowScale; @@ -64,14 +66,12 @@ attribute vec4 a_skinWeights; #endif - vec4 fakeCos(vec4 x) { vec4 tri = abs(fract(x + 0.5) * 2.0 - 1.0); return tri * tri *(3.0 - 2.0 * tri); } - void main() { #if USE_GPU_SKINNING @@ -85,16 +85,16 @@ n += vec3(m * vec4(a_normal, 0.0)) * a_skinWeights[i]; } } - vec4 position = instancingTransform * vec4(p, 1.0); - mat3 normalMatrix = mat3(instancingTransform[0].xyz, instancingTransform[1].xyz, instancingTransform[2].xyz); + vec4 position = instancingTransformReal[gl_InstanceIDARB] * vec4(p, 1.0); + mat3 normalMatrix = mat3(instancingTransformReal[gl_InstanceIDARB][0].xyz, instancingTransformReal[gl_InstanceIDARB][1].xyz, instancingTransformReal[gl_InstanceIDARB][2].xyz); vec3 normal = normalMatrix * normalize(n); #if (USE_NORMAL_MAP || USE_PARALLAX) vec3 tangent = normalMatrix * a_tangent.xyz; #endif #else #if (USE_INSTANCING) - vec4 position = instancingTransform * vec4(a_vertex, 1.0); - mat3 normalMatrix = mat3(instancingTransform[0].xyz, instancingTransform[1].xyz, instancingTransform[2].xyz); + vec4 position = instancingTransformReal[gl_InstanceIDARB] * vec4(a_vertex, 1.0); + mat3 normalMatrix = mat3(instancingTransformReal[gl_InstanceIDARB][0].xyz, instancingTransformReal[gl_InstanceIDARB][1].xyz, instancingTransformReal[gl_InstanceIDARB][2].xyz); vec3 normal = normalMatrix * a_normal; #if (USE_NORMAL_MAP || USE_PARALLAX) vec3 tangent = normalMatrix * a_tangent.xyz; @@ -110,7 +110,7 @@ vec2 wind = windData.xy; // fractional part of model position, clamped to >.4 - vec4 modelPos = instancingTransform[3]; + vec4 modelPos = instancingTransformReal[gl_InstanceIDARB][3]; modelPos = fract(modelPos); modelPos = clamp(modelPos, 0.4, 1.0); @@ -121,7 +121,7 @@ // these determine the speed of the wind's "cosine" waves. cosVec.w = 0.0; cosVec.x = sim_time.x * modelPos[0] + position.x; - cosVec.y = sim_time.x * modelPos[2] / 3.0 + instancingTransform[3][0]; + cosVec.y = sim_time.x * modelPos[2] / 3.0 + instancingTransformReal[gl_InstanceIDARB][3][0]; cosVec.z = sim_time.x * abswind / 4.0 + position.z; // calculate "cosines" in parallel, using a smoothed triangle wave Index: source/graphics/Model.h =================================================================== --- source/graphics/Model.h +++ source/graphics/Model.h @@ -112,6 +112,8 @@ // get the currently playing animation, if any CSkeletonAnim* GetAnimation() const { return m_Anim; } + float GetAnimTime() const { return m_AnimTime; } + // set the animation state to be the same as from another; both models should // be compatible types (same type of skeleton) void CopyAnimationFrom(CModel* source); Index: source/lib/external_libraries/glext_funcs.h =================================================================== --- source/lib/external_libraries/glext_funcs.h +++ source/lib/external_libraries/glext_funcs.h @@ -359,6 +359,9 @@ FUNC2(void, glBindFragDataLocationEXT, glBindFragDataLocation, "3.0", (GLuint program, GLuint colorNumber, const char *name)) FUNC2(GLint, glGetFragDataLocationEXT, glGetFragDataLocation, "3.0", (GLuint program, const char *name)) +// GL_ARB_draw_instanced / GL 3.3 +FUNC2(void, glDrawElementsInstancedARB, glDrawElementsInstanced, "3.3", (GLenum mode, GLsizei count, GLenum type, const void * indices, GLsizei instancecount)) + // GL_ARB_occlusion_query / GL1.5: FUNC2(void, glGenQueriesARB, glGenQueries, "1.5", (GLsizei n, GLuint *ids)) FUNC2(void, glDeleteQueriesARB, glDeleteQueries, "1.5", (GLsizei n, const GLuint *ids)) Index: source/ps/CStrInternStatic.h =================================================================== --- source/ps/CStrInternStatic.h +++ source/ps/CStrInternStatic.h @@ -101,6 +101,7 @@ X(hdr) X(height) X(instancingTransform) +X2(instancingTransformReal, "instancingTransformReal[0]") X(losMap) X(losMatrix) X(losTex) Index: source/ps/GameSetup/GameSetup.cpp =================================================================== --- source/ps/GameSetup/GameSetup.cpp +++ source/ps/GameSetup/GameSetup.cpp @@ -334,6 +334,7 @@ g_Renderer.EndFrame(); PROFILE2_ATTR("draw calls: %d", (int)g_Renderer.GetStats().m_DrawCalls); + PROFILE2_ATTR("saved draw calls: %d", (int)g_Renderer.GetStats().m_SavedDrawCalls); PROFILE2_ATTR("terrain tris: %d", (int)g_Renderer.GetStats().m_TerrainTris); PROFILE2_ATTR("water tris: %d", (int)g_Renderer.GetStats().m_WaterTris); PROFILE2_ATTR("model tris: %d", (int)g_Renderer.GetStats().m_ModelTris); Index: source/renderer/InstancingModelRenderer.h =================================================================== --- source/renderer/InstancingModelRenderer.h +++ source/renderer/InstancingModelRenderer.h @@ -45,7 +45,9 @@ void BeginPass(int streamflags); void EndPass(int streamflags); void PrepareModelDef(const CShaderProgramPtr& shader, int streamflags, const CModelDef& def); + bool CanInstance() { return true; } void RenderModel(const CShaderProgramPtr& shader, int streamflags, CModel* model, CModelRData* data); + void RenderInstancedModel(const CShaderProgramPtr& shader, const std::vector& model); protected: InstancingModelRendererInternals* m; Index: source/renderer/InstancingModelRenderer.cpp =================================================================== --- source/renderer/InstancingModelRenderer.cpp +++ source/renderer/InstancingModelRenderer.cpp @@ -305,6 +305,8 @@ } +static CShaderProgram::Binding instancingTransformBinding; + // Prepare UV coordinates for this modeldef void InstancingModelRenderer::PrepareModelDef(const CShaderProgramPtr& shader, int streamflags, const CModelDef& def) { @@ -343,6 +345,8 @@ shader->VertexAttribPointer(str_a_skinWeights, 4, GL_UNSIGNED_BYTE, GL_TRUE, stride, base + m->imodeldef->m_BlendWeights.offset); } + instancingTransformBinding = shader->GetUniformBinding(str_instancingTransform); + shader->AssertPointersBound(); } @@ -352,6 +356,8 @@ { const CModelDefPtr& mdldef = model->GetModelDef(); + shader->Uniform(instancingTransformBinding, model->GetTransform()); + if (m->gpuSkinning) { // Bind matrices for current animation state. @@ -382,3 +388,61 @@ g_Renderer.m_Stats.m_ModelTris += numFaces; } + +static std::vector uniforms; + +void InstancingModelRenderer::RenderInstancedModel(const CShaderProgramPtr& shader, const std::vector& models) +{ + if (g_Renderer.m_SkipSubmit) + return; + + const CModelDefPtr& mdldef = models.front()->GetModelDef(); + + if (m->gpuSkinning) + { + // HACK: this gives the same animation to all similar modeldefs, + // which is somewhat obviously broken, but it renders something. + + // Bind matrices for current animation state. + // Add 1 to NumBones because of the special 'root' bone. + // HACK: NVIDIA drivers return uniform name with "[0]", Intel Windows drivers without; + // try uploading both names since one of them should work, and this is easier than + // canonicalising the uniform names in CShaderProgramGLSL + shader->Uniform(str_skinBlendMatrices_0, mdldef->GetNumBones() + 1, models.front()->GetAnimatedBoneMatrices()); + shader->Uniform(str_skinBlendMatrices, mdldef->GetNumBones() + 1, models.front()->GetAnimatedBoneMatrices()); + } + + size_t numFaces = mdldef->GetNumFaces(); + + // Set up a uniform + uniforms.reserve(64); + + if (models.size() == 1) + { + shader->Uniform(str_instancingTransformReal, 1, &models[0]->GetTransform()); + glDrawElements(GL_TRIANGLES, + (GLsizei)numFaces*3, + GL_UNSIGNED_SHORT, + m->imodeldefIndexBase); + g_Renderer.m_Stats.m_DrawCalls++; + g_Renderer.m_Stats.m_ModelTris += numFaces; + return; + } + + uniforms.clear(); + + for (CModel* model : models) + uniforms.emplace_back(model->GetTransform()); + + for (size_t start = 0, end = std::min((size_t)32, models.size()); start < models.size(); start += 32, end = std::min(start+32, models.size())) + { + shader->Uniform(str_instancingTransformReal, end-start, uniforms.data() + start); + glDrawElementsInstancedARB(GL_TRIANGLES, + (GLsizei)numFaces*3, + GL_UNSIGNED_SHORT, + m->imodeldefIndexBase, end-start); + g_Renderer.m_Stats.m_DrawCalls++; + g_Renderer.m_Stats.m_SavedDrawCalls += (end-start-1); + g_Renderer.m_Stats.m_ModelTris += numFaces * (end-start); + } +} Index: source/renderer/ModelRenderer.cpp =================================================================== --- source/renderer/ModelRenderer.cpp +++ source/renderer/ModelRenderer.cpp @@ -298,6 +298,11 @@ if (b->GetMaterial().GetDiffuseTexture() < a->GetMaterial().GetDiffuseTexture()) return false; + if (a->GetPlayerID() < b->GetPlayerID()) + return true; + if (b->GetPlayerID() < a->GetPlayerID()) + return false; + return a->GetMaterial().GetStaticUniforms() < b->GetMaterial().GetStaticUniforms(); } }; @@ -600,7 +605,6 @@ { PROFILE3("rendering bucketed submissions"); - size_t idxTechStart = 0; // This vector keeps track of texture changes during rendering. It is kept outside the @@ -645,6 +649,17 @@ m->vertexRenderer->BeginPass(streamflags); + // Uniforms remain valid for the duration of the shader linkage, + // So just assume someone will request them here. + { + CShaderProgram::Binding binding = shader->GetUniformBinding(CStrIntern("sim_time")); + if (binding.Active()) + { + double time = g_Renderer.GetTimeManager().GetGlobalTime(); + shader->Uniform(binding, time, 0.0f, 0.0f, 0.0f); + } + } + // When the shader technique changes, textures need to be // rebound, so ensure there are no remnants from the last pass. // (the vector size is set to 0, but memory is not freed) @@ -652,8 +667,33 @@ texBindings.clear(); texBindingNames.clear(); - CModelDef* currentModeldef = NULL; + CModelDef* currentModeldef = nullptr; CShaderUniforms currentStaticUniforms; + bool rq_water = false; + bool rq_skycube = false; + + CSkeletonAnim* currentAnim = nullptr; + float currentAnimTime = 0.f; + + CColor shadingcolor; + player_id_t playerid = INVALID_PLAYER; + + std::vector keptModels; + keptModels.reserve(64); + + // This must be called before changing state. + auto RenderKeptModels = [this, &shader, &keptModels, &modifier]() + { + if (keptModels.empty()) + return; +#if 0 + printf("Rendered %i %s, anim %p\n", keptModels.size(), keptModels.front()->GetModelDef()->GetName().string8().c_str(), (void*)keptModels.front()->GetAnimation()); +#endif + + modifier->PrepareModel(shader, keptModels.front()); + m->vertexRenderer->RenderInstancedModel(shader, keptModels); + keptModels.clear(); + }; for (size_t idx = idxTechStart; idx < idxTechEnd; ++idx) { @@ -700,6 +740,8 @@ CTexture* newTex = samp.Sampler.get(); if (texBindings[s].Active() && newTex != currentTexs[s]) { + RenderKeptModels(); + shader->BindTexture(texBindings[s], newTex->GetHandle()); currentTexs[s] = newTex; } @@ -709,6 +751,8 @@ CModelDef* newModeldef = model->GetModelDef().get(); if (newModeldef != currentModeldef) { + RenderKeptModels(); + currentModeldef = newModeldef; m->vertexRenderer->PrepareModelDef(shader, streamflags, *currentModeldef); } @@ -717,49 +761,65 @@ CShaderUniforms newStaticUniforms = model->GetMaterial().GetStaticUniforms(); if (newStaticUniforms != currentStaticUniforms) { + RenderKeptModels(); + currentStaticUniforms = newStaticUniforms; currentStaticUniforms.BindUniforms(shader); } const CShaderRenderQueries& renderQueries = model->GetMaterial().GetRenderQueries(); + // For render-queries, we technically don't need to render immediately + // (the state remains valid for pre-existing models) + // (though in all likelihood we'll have changed model and/or technique anyways). for (size_t q = 0; q < renderQueries.GetSize(); ++q) { CShaderRenderQueries::RenderQuery rq = renderQueries.GetItem(q); - if (rq.first == RQUERY_TIME) - { - CShaderProgram::Binding binding = shader->GetUniformBinding(rq.second); - if (binding.Active()) - { - double time = g_Renderer.GetTimeManager().GetGlobalTime(); - shader->Uniform(binding, time, 0.0f, 0.0f, 0.0f); - } - } - else if (rq.first == RQUERY_WATER_TEX) + if (rq.first == RQUERY_WATER_TEX && !rq_water) { + rq_water = true; WaterManager* WaterMgr = g_Renderer.GetWaterManager(); double time = WaterMgr->m_WaterTexTimer; double period = 1.6; int curTex = static_cast(time * 60.0 / period) % 60; - if (WaterMgr->m_RenderWater && WaterMgr->WillRenderFancyWater()) shader->BindTexture(str_waterTex, WaterMgr->m_NormalMap[curTex]); else shader->BindTexture(str_waterTex, g_Renderer.GetTextureManager().GetErrorTexture()); } - else if (rq.first == RQUERY_SKY_CUBE) + else if (rq.first == RQUERY_SKY_CUBE && !rq_skycube) { + rq_skycube = true; shader->BindTexture(str_skyCube, g_Renderer.GetSkyManager()->GetSkyCube()); } } - modifier->PrepareModel(shader, model); + if (model->GetPlayerID() != playerid || model->GetShadingColor() != shadingcolor) + { + RenderKeptModels(); + playerid = model->GetPlayerID(); + shadingcolor = model->GetShadingColor(); + modifier->PrepareModel(shader, model); + } - CModelRData* rdata = static_cast(model->GetRenderData()); - ENSURE(rdata->GetKey() == m->vertexRenderer.get()); + if (model->GetAnimation() && (model->GetAnimation() != currentAnim || + model->GetAnimTime() != currentAnimTime)) + { + RenderKeptModels(); + currentAnim = model->GetAnimation(); + currentAnimTime = model->GetAnimTime(); + } - m->vertexRenderer->RenderModel(shader, streamflags, model, rdata); - } + if (m->vertexRenderer->CanInstance() && g_RenderingOptions.GetPreferGLSL()) + keptModels.push_back(model); + else + { + CModelRData* rdata = static_cast(model->GetRenderData()); + ENSURE(rdata->GetKey() == m->vertexRenderer.get()); + m->vertexRenderer->RenderModel(shader, streamflags, model, rdata); + } + } // numModels loop + RenderKeptModels(); } m->vertexRenderer->EndPass(streamflags); Index: source/renderer/ModelVertexRenderer.h =================================================================== --- source/renderer/ModelVertexRenderer.h +++ source/renderer/ModelVertexRenderer.h @@ -133,6 +133,7 @@ */ virtual void PrepareModelDef(const CShaderProgramPtr& shader, int streamflags, const CModelDef& def) = 0; + virtual bool CanInstance() { return false; } /** * RenderModel: Invoke the rendering commands for the given model. @@ -154,6 +155,7 @@ * succeed. */ virtual void RenderModel(const CShaderProgramPtr& shader, int streamflags, CModel* model, CModelRData* data) = 0; + virtual void RenderInstancedModel(const CShaderProgramPtr& shader, const std::vector& model) {}; }; Index: source/renderer/RenderModifiers.cpp =================================================================== --- source/renderer/RenderModifiers.cpp +++ source/renderer/RenderModifiers.cpp @@ -110,9 +110,6 @@ void ShaderRenderModifier::PrepareModel(const CShaderProgramPtr& shader, CModel* model) { - if (m_BindingInstancingTransform.Active()) - shader->Uniform(m_BindingInstancingTransform, model->GetTransform()); - if (m_BindingShadingColor.Active()) shader->Uniform(m_BindingShadingColor, model->GetShadingColor()); Index: source/renderer/Renderer.h =================================================================== --- source/renderer/Renderer.h +++ source/renderer/Renderer.h @@ -90,6 +90,8 @@ void Reset() { memset(this, 0, sizeof(*this)); } // number of draw calls per frame - total DrawElements + Begin/End immediate mode loops size_t m_DrawCalls; + // Number of saved draw calls via instancing each frame. + size_t m_SavedDrawCalls; // number of terrain triangles drawn size_t m_TerrainTris; // number of water triangles drawn Index: source/renderer/Renderer.cpp =================================================================== --- source/renderer/Renderer.cpp +++ source/renderer/Renderer.cpp @@ -114,6 +114,7 @@ enum { Row_DrawCalls = 0, + Row_SavedDrawCalls, Row_TerrainTris, Row_WaterTris, Row_ModelTris, @@ -171,6 +172,12 @@ sprintf_s(buf, sizeof(buf), "%lu", (unsigned long)Stats.m_DrawCalls); return buf; + case Row_SavedDrawCalls: + if (col == 0) + return "# saved draw calls"; + sprintf_s(buf, sizeof(buf), "%lu", (unsigned long)Stats.m_SavedDrawCalls); + return buf; + case Row_TerrainTris: if (col == 0) return "# terrain tris";