Index: ps/trunk/source/ps/CStrInternStatic.h =================================================================== --- ps/trunk/source/ps/CStrInternStatic.h (revision 26815) +++ ps/trunk/source/ps/CStrInternStatic.h (revision 26816) @@ -1,195 +1,192 @@ /* Copyright (C) 2022 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * 0 A.D. is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with 0 A.D. If not, see . */ // This file defines global CStrIntern variables, to avoid the cost of // constructing CStrInterns frequently at runtime. // // A line like // X(foo) // defines a variable str_foo with value "foo". // // A line like // X2(foo_0, "foo[0]") // defines a variable str_foo_0 with value "foo[0]". // For direct inclusion, we presumably just want the extern definitions. #ifndef X #include "CStrIntern.h" #define X(id) extern CStrIntern str_##id; #define X2(id, str) extern CStrIntern str_##id; #endif X(0) X(1) X(2) X(3) X(4) X(ALPHABLEND_PASS_BLEND) X(ALPHABLEND_PASS_OPAQUE) X(BLEND) X(BLOOM_NOP) X(BLOOM_PASS_H) X(BLOOM_PASS_V) X(DECAL) X(DISABLE_RECEIVE_SHADOWS) X(IGNORE_LOS) X(MINIMAP_BASE) X(MINIMAP_LINE) X(MINIMAP_LOS) X(MINIMAP_MASK) X(MINIMAP_POINT) X(MODE_SHADOWCAST) X(MODE_SILHOUETTEDISPLAY) X(MODE_SILHOUETTEOCCLUDER) X(MODE_WIREFRAME) X(MODE_WIREFRAME_SOLID) X(PASS_REFLECTIONS) X(PASS_REFRACTIONS) X(PASS_SHADOWS) X(RENDER_DEBUG_MODE) X(RENDER_DEBUG_MODE_AO) X(RENDER_DEBUG_MODE_ALPHA) X(RENDER_DEBUG_MODE_CUSTOM) X(RENDER_DEBUG_MODE_NONE) X(SHADOWS_CASCADE_COUNT) X(USE_FANCY_EFFECTS) X(USE_FP_SHADOW) X(USE_GPU_SKINNING) X(USE_INSTANCING) X(USE_NORMALS) X(USE_OBJECTCOLOR) X(USE_REAL_DEPTH) X(USE_REFLECTION) X(USE_REFRACTION) X(USE_SHADOW) X(USE_SHADOW_PCF) X(USE_SHADOW_SAMPLER) X(USE_FOG) X(WATERTYPE_CLAP) X(WATERTYPE_LAKE) X2(_emptystring, "") X(a_apexPosition) X(a_otherPosition) X(a_retreatPosition) X(a_skinJoints) X(a_skinWeights) X(a_splashPosition) X(a_tangent) X(a_waterInfo) X(ambient) X(baseTex) X(blendTex) X(bloom) X(blurTex2) X(blurTex4) X(blurTex8) X(brightness) X(cameraForward) X(cameraPos) X(canvas2d) X(color) X(colorAdd) X(colorMul) X(debug_line) X(debug_overlay) X(delta) X(depthTex) X(dummy) X(foamTex) X(fogColor) X(fogParams) X(foreground_overlay) X(grayscaleFactor) X(hdr) X(height) X(instancingTransform) X(losTex) X(losTex1) X(losTex2) X(losTransform) X(los_interp) X(mapSize) X(maskTex) X(maskTextureTransform) X(minimap) X(modelViewMatrix) X(murkiness) X(normalMap) X(normalMap2) X(objectColor) X(overlay_line) X(overlay_solid) X(particle_add) X(particle_multiply) X(particle_overlay) X(particle_solid) X(particle_subtract) X(playerColor) X(projInvTransform) X(qualityLevel) X(reflectionMap) X(reflectionMatrix) X(refractionMap) X(refractionMatrix) X(renderedTex) X(repeatScale) X2(sans_10, "sans-10"); X(saturation) X(screenSize) X(shadingColor) X(shadowDistance) X(shadowDistances) -X2(shadowDistances_0, "shadowDistances[0]") X(shadowScale) X(shadowTex) X(shadowTransform) X(shadowTransforms) -X2(shadowTransforms_0, "shadowTransforms[0]") X(sharpness) X(skinBlendMatrices) -X2(skinBlendMatrices_0, "skinBlendMatrices[0]") X(skyBoxRot) X(skyCube) X(sky_simple) X(solid) X(sunColor) X(sunDir) X(terrain_solid) X(tex) X(texSize) X(textureTransform) X(time) X(tint) X(transform) X(translation) X(viewInvTransform) X(water_high) X(water_simple) X(water_waves) X(waterEffectsTex) X(waterTex) X(waveTex) X(waviness) X(waveParams1) X(waveParams2) X(width) X(windAngle) X(zFar) X(zNear) #undef X #undef X2 Index: ps/trunk/source/renderer/InstancingModelRenderer.cpp =================================================================== --- ps/trunk/source/renderer/InstancingModelRenderer.cpp (revision 26815) +++ ps/trunk/source/renderer/InstancingModelRenderer.cpp (revision 26816) @@ -1,396 +1,392 @@ /* Copyright (C) 2022 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * 0 A.D. is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with 0 A.D. If not, see . */ #include "precompiled.h" #include "renderer/InstancingModelRenderer.h" #include "graphics/Color.h" #include "graphics/LightEnv.h" #include "graphics/Model.h" #include "graphics/ModelDef.h" #include "lib/ogl.h" #include "maths/Vector3D.h" #include "maths/Vector4D.h" #include "ps/CLogger.h" #include "ps/CStrInternStatic.h" #include "renderer/Renderer.h" #include "renderer/RenderModifiers.h" #include "renderer/VertexArray.h" #include "third_party/mikktspace/weldmesh.h" struct IModelDef : public CModelDefRPrivate { /// Static per-CModel vertex array VertexArray m_Array; /// Position and normals are static VertexArray::Attribute m_Position; VertexArray::Attribute m_Normal; VertexArray::Attribute m_Tangent; VertexArray::Attribute m_BlendJoints; // valid iff gpuSkinning == true VertexArray::Attribute m_BlendWeights; // valid iff gpuSkinning == true /// The number of UVs is determined by the model std::vector m_UVs; /// Indices are the same for all models, so share them VertexIndexArray m_IndexArray; IModelDef(const CModelDefPtr& mdef, bool gpuSkinning, bool calculateTangents); }; IModelDef::IModelDef(const CModelDefPtr& mdef, bool gpuSkinning, bool calculateTangents) : m_IndexArray(false), m_Array(Renderer::Backend::GL::CBuffer::Type::VERTEX, false) { size_t numVertices = mdef->GetNumVertices(); m_Position.format = Renderer::Backend::Format::R32G32B32_SFLOAT; m_Array.AddAttribute(&m_Position); m_Normal.format = Renderer::Backend::Format::R32G32B32_SFLOAT; m_Array.AddAttribute(&m_Normal); m_UVs.resize(mdef->GetNumUVsPerVertex()); for (size_t i = 0; i < mdef->GetNumUVsPerVertex(); i++) { m_UVs[i].format = Renderer::Backend::Format::R32G32_SFLOAT; m_Array.AddAttribute(&m_UVs[i]); } if (gpuSkinning) { // We can't use a lot of bones because it costs uniform memory. Recommended // number of bones per model is 32. // Add 1 to NumBones because of the special 'root' bone. if (mdef->GetNumBones() + 1 > 64) LOGERROR("Model '%s' has too many bones %zu/64", mdef->GetName().string8().c_str(), mdef->GetNumBones() + 1); ENSURE(mdef->GetNumBones() + 1 <= 64); m_BlendJoints.format = Renderer::Backend::Format::R8G8B8A8_UINT; m_Array.AddAttribute(&m_BlendJoints); m_BlendWeights.format = Renderer::Backend::Format::R8G8B8A8_UNORM; m_Array.AddAttribute(&m_BlendWeights); } if (calculateTangents) { // Generate tangents for the geometry:- m_Tangent.format = Renderer::Backend::Format::R32G32B32A32_SFLOAT; m_Array.AddAttribute(&m_Tangent); // floats per vertex; position + normal + tangent + UV*sets [+ GPUskinning] int numVertexAttrs = 3 + 3 + 4 + 2 * mdef->GetNumUVsPerVertex(); if (gpuSkinning) { numVertexAttrs += 8; } // the tangent generation can increase the number of vertices temporarily // so reserve a bit more memory to avoid reallocations in GenTangents (in most cases) std::vector newVertices; newVertices.reserve(numVertexAttrs * numVertices * 2); // Generate the tangents ModelRenderer::GenTangents(mdef, newVertices, gpuSkinning); // how many vertices do we have after generating tangents? int newNumVert = newVertices.size() / numVertexAttrs; std::vector remapTable(newNumVert); std::vector vertexDataOut(newNumVert * numVertexAttrs); // re-weld the mesh to remove duplicated vertices int numVertices2 = WeldMesh(&remapTable[0], &vertexDataOut[0], &newVertices[0], newNumVert, numVertexAttrs); // Copy the model data to graphics memory:- m_Array.SetNumberOfVertices(numVertices2); m_Array.Layout(); VertexArrayIterator Position = m_Position.GetIterator(); VertexArrayIterator Normal = m_Normal.GetIterator(); VertexArrayIterator Tangent = m_Tangent.GetIterator(); VertexArrayIterator BlendJoints; VertexArrayIterator BlendWeights; if (gpuSkinning) { BlendJoints = m_BlendJoints.GetIterator(); BlendWeights = m_BlendWeights.GetIterator(); } // copy everything into the vertex array for (int i = 0; i < numVertices2; i++) { int q = numVertexAttrs * i; Position[i] = CVector3D(vertexDataOut[q + 0], vertexDataOut[q + 1], vertexDataOut[q + 2]); q += 3; Normal[i] = CVector3D(vertexDataOut[q + 0], vertexDataOut[q + 1], vertexDataOut[q + 2]); q += 3; Tangent[i] = CVector4D(vertexDataOut[q + 0], vertexDataOut[q + 1], vertexDataOut[q + 2], vertexDataOut[q + 3]); q += 4; if (gpuSkinning) { for (size_t j = 0; j < 4; ++j) { BlendJoints[i][j] = (u8)vertexDataOut[q + 0 + 2 * j]; BlendWeights[i][j] = (u8)vertexDataOut[q + 1 + 2 * j]; } q += 8; } for (size_t j = 0; j < mdef->GetNumUVsPerVertex(); j++) { VertexArrayIterator UVit = m_UVs[j].GetIterator(); UVit[i][0] = vertexDataOut[q + 0 + 2 * j]; UVit[i][1] = vertexDataOut[q + 1 + 2 * j]; } } // upload vertex data m_Array.Upload(); m_Array.FreeBackingStore(); m_IndexArray.SetNumberOfVertices(mdef->GetNumFaces() * 3); m_IndexArray.Layout(); VertexArrayIterator Indices = m_IndexArray.GetIterator(); size_t idxidx = 0; // reindex geometry and upload index for (size_t j = 0; j < mdef->GetNumFaces(); ++j) { Indices[idxidx++] = remapTable[j * 3 + 0]; Indices[idxidx++] = remapTable[j * 3 + 1]; Indices[idxidx++] = remapTable[j * 3 + 2]; } m_IndexArray.Upload(); m_IndexArray.FreeBackingStore(); } else { // Upload model without calculating tangents:- m_Array.SetNumberOfVertices(numVertices); m_Array.Layout(); VertexArrayIterator Position = m_Position.GetIterator(); VertexArrayIterator Normal = m_Normal.GetIterator(); ModelRenderer::CopyPositionAndNormals(mdef, Position, Normal); for (size_t i = 0; i < mdef->GetNumUVsPerVertex(); i++) { VertexArrayIterator UVit = m_UVs[i].GetIterator(); ModelRenderer::BuildUV(mdef, UVit, i); } if (gpuSkinning) { VertexArrayIterator BlendJoints = m_BlendJoints.GetIterator(); VertexArrayIterator BlendWeights = m_BlendWeights.GetIterator(); for (size_t i = 0; i < numVertices; ++i) { const SModelVertex& vtx = mdef->GetVertices()[i]; for (size_t j = 0; j < 4; ++j) { BlendJoints[i][j] = vtx.m_Blend.m_Bone[j]; BlendWeights[i][j] = (u8)(255.f * vtx.m_Blend.m_Weight[j]); } } } m_Array.Upload(); m_Array.FreeBackingStore(); m_IndexArray.SetNumberOfVertices(mdef->GetNumFaces()*3); m_IndexArray.Layout(); ModelRenderer::BuildIndices(mdef, m_IndexArray.GetIterator()); m_IndexArray.Upload(); m_IndexArray.FreeBackingStore(); } } struct InstancingModelRendererInternals { bool gpuSkinning; bool calculateTangents; /// Previously prepared modeldef IModelDef* imodeldef; /// Index base for imodeldef u8* imodeldefIndexBase; }; // Construction and Destruction InstancingModelRenderer::InstancingModelRenderer(bool gpuSkinning, bool calculateTangents) { m = new InstancingModelRendererInternals; m->gpuSkinning = gpuSkinning; m->calculateTangents = calculateTangents; m->imodeldef = 0; } InstancingModelRenderer::~InstancingModelRenderer() { delete m; } // Build modeldef data if necessary - we have no per-CModel data CModelRData* InstancingModelRenderer::CreateModelData(const void* key, CModel* model) { CModelDefPtr mdef = model->GetModelDef(); IModelDef* imodeldef = (IModelDef*)mdef->GetRenderData(m); if (m->gpuSkinning) ENSURE(model->IsSkinned()); else ENSURE(!model->IsSkinned()); if (!imodeldef) { imodeldef = new IModelDef(mdef, m->gpuSkinning, m->calculateTangents); mdef->SetRenderData(m, imodeldef); } return new CModelRData(key); } void InstancingModelRenderer::UpdateModelData(CModel* UNUSED(model), CModelRData* UNUSED(data), int UNUSED(updateflags)) { // We have no per-CModel data } // Setup one rendering pass. void InstancingModelRenderer::BeginPass() { } // Cleanup rendering pass. void InstancingModelRenderer::EndPass( Renderer::Backend::GL::CDeviceCommandContext* UNUSED(deviceCommandContext)) { } // Prepare UV coordinates for this modeldef void InstancingModelRenderer::PrepareModelDef( Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext, Renderer::Backend::GL::CShaderProgram* UNUSED(shader), const CModelDef& def) { m->imodeldef = (IModelDef*)def.GetRenderData(m); ENSURE(m->imodeldef); m->imodeldef->m_Array.UploadIfNeeded(deviceCommandContext); m->imodeldef->m_IndexArray.UploadIfNeeded(deviceCommandContext); deviceCommandContext->SetIndexBuffer(m->imodeldef->m_IndexArray.GetBuffer()); const uint32_t stride = m->imodeldef->m_Array.GetStride(); const uint32_t firstVertexOffset = m->imodeldef->m_Array.GetOffset() * stride; deviceCommandContext->SetVertexAttributeFormat( Renderer::Backend::VertexAttributeStream::POSITION, m->imodeldef->m_Position.format, firstVertexOffset + m->imodeldef->m_Position.offset, stride, 0); deviceCommandContext->SetVertexAttributeFormat( Renderer::Backend::VertexAttributeStream::NORMAL, m->imodeldef->m_Normal.format, firstVertexOffset + m->imodeldef->m_Normal.offset, stride, 0); constexpr size_t MAX_UV = 2; for (size_t uv = 0; uv < std::min(MAX_UV, def.GetNumUVsPerVertex()); ++uv) { const Renderer::Backend::VertexAttributeStream stream = static_cast( static_cast(Renderer::Backend::VertexAttributeStream::UV0) + uv); deviceCommandContext->SetVertexAttributeFormat( stream, m->imodeldef->m_UVs[uv].format, firstVertexOffset + m->imodeldef->m_UVs[uv].offset, stride, 0); } // GPU skinning requires extra attributes to compute positions/normals. if (m->gpuSkinning) { deviceCommandContext->SetVertexAttributeFormat( Renderer::Backend::VertexAttributeStream::UV2, m->imodeldef->m_BlendJoints.format, firstVertexOffset + m->imodeldef->m_BlendJoints.offset, stride, 0); deviceCommandContext->SetVertexAttributeFormat( Renderer::Backend::VertexAttributeStream::UV3, m->imodeldef->m_BlendWeights.format, firstVertexOffset + m->imodeldef->m_BlendWeights.offset, stride, 0); } if (m->calculateTangents) { deviceCommandContext->SetVertexAttributeFormat( Renderer::Backend::VertexAttributeStream::UV4, m->imodeldef->m_Tangent.format, firstVertexOffset + m->imodeldef->m_Tangent.offset, stride, 0); } deviceCommandContext->SetVertexBuffer(0, m->imodeldef->m_Array.GetBuffer()); } // Render one model void InstancingModelRenderer::RenderModel( Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext, Renderer::Backend::GL::CShaderProgram* shader, CModel* model, CModelRData* UNUSED(data)) { const CModelDefPtr& mdldef = model->GetModelDef(); if (m->gpuSkinning) { // Bind matrices for current animation state. // Add 1 to NumBones because of the special 'root' bone. - // HACK: NVIDIA drivers return uniform name with "[0]", Intel Windows drivers without; - // try uploading both names since one of them should work, and this is easier than - // canonicalising the uniform names in CShaderProgramGLSL - shader->Uniform(str_skinBlendMatrices_0, mdldef->GetNumBones() + 1, model->GetAnimatedBoneMatrices()); shader->Uniform(str_skinBlendMatrices, mdldef->GetNumBones() + 1, model->GetAnimatedBoneMatrices()); } // Render the lot. const size_t numberOfFaces = mdldef->GetNumFaces(); deviceCommandContext->DrawIndexedInRange( m->imodeldef->m_IndexArray.GetOffset(), numberOfFaces * 3, 0, m->imodeldef->m_Array.GetNumberOfVertices() - 1); // Bump stats. g_Renderer.m_Stats.m_DrawCalls++; g_Renderer.m_Stats.m_ModelTris += numberOfFaces; } Index: ps/trunk/source/renderer/ShadowMap.cpp =================================================================== --- ps/trunk/source/renderer/ShadowMap.cpp (revision 26815) +++ ps/trunk/source/renderer/ShadowMap.cpp (revision 26816) @@ -1,781 +1,779 @@ /* Copyright (C) 2022 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * 0 A.D. is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with 0 A.D. If not, see . */ #include "precompiled.h" #include "ShadowMap.h" #include "graphics/Camera.h" #include "graphics/LightEnv.h" #include "graphics/ShaderManager.h" #include "gui/GUIMatrix.h" #include "lib/bits.h" #include "lib/ogl.h" #include "maths/BoundingBoxAligned.h" #include "maths/Brush.h" #include "maths/Frustum.h" #include "maths/MathUtil.h" #include "maths/Matrix3D.h" #include "ps/CLogger.h" #include "ps/ConfigDB.h" #include "ps/CStrInternStatic.h" #include "ps/Profile.h" #include "ps/VideoMode.h" #include "renderer/backend/gl/Device.h" #include "renderer/backend/gl/Texture.h" #include "renderer/DebugRenderer.h" #include "renderer/Renderer.h" #include "renderer/RenderingOptions.h" #include "renderer/SceneRenderer.h" #include namespace { constexpr int MAX_CASCADE_COUNT = 4; constexpr float DEFAULT_SHADOWS_CUTOFF_DISTANCE = 300.0f; constexpr float DEFAULT_CASCADE_DISTANCE_RATIO = 1.7f; } // anonymous namespace /** * Struct ShadowMapInternals: Internal data for the ShadowMap implementation */ struct ShadowMapInternals { std::unique_ptr Framebuffer; std::unique_ptr Texture; // bit depth for the depth texture int DepthTextureBits; // width, height of shadow map int Width, Height; // Shadow map quality (-1 - Low, 0 - Medium, 1 - High, 2 - Very High) int QualityLevel; // used width, height of shadow map int EffectiveWidth, EffectiveHeight; // Transform world space into light space; calculated on SetupFrame CMatrix3D LightTransform; // transform light space into world space CMatrix3D InvLightTransform; CBoundingBoxAligned ShadowReceiverBound; int CascadeCount; float CascadeDistanceRatio; float ShadowsCutoffDistance; bool ShadowsCoverMap; struct Cascade { // transform light space into projected light space // in projected light space, the shadowbound box occupies the [-1..1] cube // calculated on BeginRender, after the final shadow bounds are known CMatrix3D LightProjection; float Distance; CBoundingBoxAligned FrustumBBAA; CBoundingBoxAligned ConvexBounds; CBoundingBoxAligned ShadowRenderBound; // Bounding box of shadowed objects in the light space. CBoundingBoxAligned ShadowCasterBound; // Transform world space into texture space of the shadow map; // calculated on BeginRender, after the final shadow bounds are known CMatrix3D TextureMatrix; // View port of the shadow texture where the cascade should be rendered. SViewPort ViewPort; }; std::array Cascades; // Camera transformed into light space CCamera LightspaceCamera; // Some drivers (at least some Intel Mesa ones) appear to handle alpha testing // incorrectly when the FBO has only a depth attachment. // When m_ShadowAlphaFix is true, we use DummyTexture to store a useless // alpha texture which is attached to the FBO as a workaround. std::unique_ptr DummyTexture; // Copy of renderer's standard view camera, saved between // BeginRender and EndRender while we replace it with the shadow camera CCamera SavedViewCamera; void CalculateShadowMatrices(const int cascade); void CreateTexture(); void UpdateCascadesParameters(); }; void ShadowMapInternals::UpdateCascadesParameters() { CascadeCount = 1; CFG_GET_VAL("shadowscascadecount", CascadeCount); if (CascadeCount < 1 || CascadeCount > MAX_CASCADE_COUNT || g_VideoMode.GetBackend() == CVideoMode::Backend::GL_ARB) CascadeCount = 1; ShadowsCoverMap = false; CFG_GET_VAL("shadowscovermap", ShadowsCoverMap); } void CalculateBoundsForCascade( const CCamera& camera, const CMatrix3D& lightTransform, const float nearPlane, const float farPlane, CBoundingBoxAligned* bbaa, CBoundingBoxAligned* frustumBBAA) { frustumBBAA->SetEmpty(); // We need to calculate a circumscribed sphere for the camera to // create a rotation stable bounding box. const CVector3D cameraIn = camera.m_Orientation.GetIn(); const CVector3D cameraTranslation = camera.m_Orientation.GetTranslation(); const CVector3D centerNear = cameraTranslation + cameraIn * nearPlane; const CVector3D centerDist = cameraTranslation + cameraIn * farPlane; // We can solve 3D problem in 2D space, because the frustum is // symmetric by 2 planes. Than means we can use only one corner // to find a circumscribed sphere. CCamera::Quad corners; camera.GetViewQuad(nearPlane, corners); for (CVector3D& corner : corners) corner = camera.GetOrientation().Transform(corner); const CVector3D cornerNear = corners[0]; for (const CVector3D& corner : corners) *frustumBBAA += lightTransform.Transform(corner); camera.GetViewQuad(farPlane, corners); for (CVector3D& corner : corners) corner = camera.GetOrientation().Transform(corner); const CVector3D cornerDist = corners[0]; for (const CVector3D& corner : corners) *frustumBBAA += lightTransform.Transform(corner); // We solve 2D case for the right trapezoid. const float firstBase = (cornerNear - centerNear).Length(); const float secondBase = (cornerDist - centerDist).Length(); const float height = (centerDist - centerNear).Length(); const float distanceToCenter = (height * height + secondBase * secondBase - firstBase * firstBase) * 0.5f / height; CVector3D position = cameraTranslation + cameraIn * (nearPlane + distanceToCenter); const float radius = (cornerNear - position).Length(); // We need to convert the bounding box to the light space. position = lightTransform.Rotate(position); const float insets = 0.2f; *bbaa = CBoundingBoxAligned(position, position); bbaa->Expand(radius); bbaa->Expand(insets); } ShadowMap::ShadowMap() { m = new ShadowMapInternals; m->Framebuffer = 0; m->Width = 0; m->Height = 0; m->QualityLevel = 0; m->EffectiveWidth = 0; m->EffectiveHeight = 0; m->DepthTextureBits = 0; // DepthTextureBits: 24/32 are very much faster than 16, on GeForce 4 and FX; // but they're very much slower on Radeon 9800. // In both cases, the default (no specified depth) is fast, so we just use // that by default and hope it's alright. (Otherwise, we'd probably need to // do some kind of hardware detection to work out what to use.) // Avoid using uninitialised values in AddShadowedBound if SetupFrame wasn't called first m->LightTransform.SetIdentity(); m->UpdateCascadesParameters(); } ShadowMap::~ShadowMap() { m->Framebuffer.reset(); m->Texture.reset(); m->DummyTexture.reset(); delete m; } // Force the texture/buffer/etc to be recreated, particularly when the renderer's // size has changed void ShadowMap::RecreateTexture() { m->Framebuffer.reset(); m->Texture.reset(); m->DummyTexture.reset(); m->UpdateCascadesParameters(); // (Texture will be constructed in next SetupFrame) } // SetupFrame: camera and light direction for this frame void ShadowMap::SetupFrame(const CCamera& camera, const CVector3D& lightdir) { if (!m->Texture) m->CreateTexture(); CVector3D x(0, 1, 0), eyepos; CVector3D z = lightdir; z.Normalize(); x -= z * z.Dot(x); if (x.Length() < 0.001) { // this is invoked if the camera and light directions almost coincide // assumption: light direction has a significant Z component x = CVector3D(1.0, 0.0, 0.0); x -= z * z.Dot(x); } x.Normalize(); CVector3D y = z.Cross(x); // X axis perpendicular to light direction, flowing along with view direction m->LightTransform._11 = x.X; m->LightTransform._12 = x.Y; m->LightTransform._13 = x.Z; // Y axis perpendicular to light and view direction m->LightTransform._21 = y.X; m->LightTransform._22 = y.Y; m->LightTransform._23 = y.Z; // Z axis is in direction of light m->LightTransform._31 = z.X; m->LightTransform._32 = z.Y; m->LightTransform._33 = z.Z; // eye is at the origin of the coordinate system m->LightTransform._14 = -x.Dot(eyepos); m->LightTransform._24 = -y.Dot(eyepos); m->LightTransform._34 = -z.Dot(eyepos); m->LightTransform._41 = 0.0; m->LightTransform._42 = 0.0; m->LightTransform._43 = 0.0; m->LightTransform._44 = 1.0; m->LightTransform.GetInverse(m->InvLightTransform); m->ShadowReceiverBound.SetEmpty(); m->LightspaceCamera = camera; m->LightspaceCamera.m_Orientation = m->LightTransform * camera.m_Orientation; m->LightspaceCamera.UpdateFrustum(); m->ShadowsCutoffDistance = DEFAULT_SHADOWS_CUTOFF_DISTANCE; m->CascadeDistanceRatio = DEFAULT_CASCADE_DISTANCE_RATIO; CFG_GET_VAL("shadowscutoffdistance", m->ShadowsCutoffDistance); CFG_GET_VAL("shadowscascadedistanceratio", m->CascadeDistanceRatio); m->CascadeDistanceRatio = Clamp(m->CascadeDistanceRatio, 1.1f, 16.0f); m->Cascades[GetCascadeCount() - 1].Distance = m->ShadowsCutoffDistance; for (int cascade = GetCascadeCount() - 2; cascade >= 0; --cascade) m->Cascades[cascade].Distance = m->Cascades[cascade + 1].Distance / m->CascadeDistanceRatio; if (GetCascadeCount() == 1 || m->ShadowsCoverMap) { m->Cascades[0].ViewPort = SViewPort{1, 1, m->EffectiveWidth - 2, m->EffectiveHeight - 2}; if (m->ShadowsCoverMap) m->Cascades[0].Distance = camera.GetFarPlane(); } else { for (int cascade = 0; cascade < GetCascadeCount(); ++cascade) { const int offsetX = (cascade & 0x1) ? m->EffectiveWidth / 2 : 0; const int offsetY = (cascade & 0x2) ? m->EffectiveHeight / 2 : 0; m->Cascades[cascade].ViewPort = SViewPort{offsetX + 1, offsetY + 1, m->EffectiveWidth / 2 - 2, m->EffectiveHeight / 2 - 2}; } } for (int cascadeIdx = 0; cascadeIdx < GetCascadeCount(); ++cascadeIdx) { ShadowMapInternals::Cascade& cascade = m->Cascades[cascadeIdx]; const float nearPlane = cascadeIdx > 0 ? m->Cascades[cascadeIdx - 1].Distance : camera.GetNearPlane(); const float farPlane = cascade.Distance; CalculateBoundsForCascade(camera, m->LightTransform, nearPlane, farPlane, &cascade.ConvexBounds, &cascade.FrustumBBAA); cascade.ShadowCasterBound.SetEmpty(); } } // AddShadowedBound: add a world-space bounding box to the bounds of shadowed // objects void ShadowMap::AddShadowCasterBound(const int cascade, const CBoundingBoxAligned& bounds) { CBoundingBoxAligned lightspacebounds; bounds.Transform(m->LightTransform, lightspacebounds); m->Cascades[cascade].ShadowCasterBound += lightspacebounds; } void ShadowMap::AddShadowReceiverBound(const CBoundingBoxAligned& bounds) { CBoundingBoxAligned lightspacebounds; bounds.Transform(m->LightTransform, lightspacebounds); m->ShadowReceiverBound += lightspacebounds; } CFrustum ShadowMap::GetShadowCasterCullFrustum(const int cascade) { // Get the bounds of all objects that can receive shadows CBoundingBoxAligned bound = m->ShadowReceiverBound; // Intersect with the camera frustum, so the shadow map doesn't have to get // stretched to cover the off-screen parts of large models bound.IntersectFrustumConservative(m->Cascades[cascade].FrustumBBAA.ToFrustum()); // ShadowBound might have been empty to begin with, producing an empty result if (bound.IsEmpty()) { // CFrustum can't easily represent nothingness, so approximate it with // a single point which won't match many objects bound += CVector3D(0.0f, 0.0f, 0.0f); return bound.ToFrustum(); } // Extend the bounds a long way towards the light source, to encompass // all objects that might cast visible shadows. // (The exact constant was picked entirely arbitrarily.) bound[0].Z -= 1000.f; CFrustum frustum = bound.ToFrustum(); frustum.Transform(m->InvLightTransform); return frustum; } // CalculateShadowMatrices: calculate required matrices for shadow map generation - the light's // projection and transformation matrices void ShadowMapInternals::CalculateShadowMatrices(const int cascade) { CBoundingBoxAligned& shadowRenderBound = Cascades[cascade].ShadowRenderBound; shadowRenderBound = Cascades[cascade].ConvexBounds; if (ShadowsCoverMap) { // Start building the shadow map to cover all objects that will receive shadows CBoundingBoxAligned receiverBound = ShadowReceiverBound; // Intersect with the camera frustum, so the shadow map doesn't have to get // stretched to cover the off-screen parts of large models receiverBound.IntersectFrustumConservative(LightspaceCamera.GetFrustum()); // Intersect with the shadow caster bounds, because there's no point // wasting space around the edges of the shadow map that we're not going // to draw into shadowRenderBound[0].X = std::max(receiverBound[0].X, Cascades[cascade].ShadowCasterBound[0].X); shadowRenderBound[0].Y = std::max(receiverBound[0].Y, Cascades[cascade].ShadowCasterBound[0].Y); shadowRenderBound[1].X = std::min(receiverBound[1].X, Cascades[cascade].ShadowCasterBound[1].X); shadowRenderBound[1].Y = std::min(receiverBound[1].Y, Cascades[cascade].ShadowCasterBound[1].Y); } else if (CascadeCount > 1) { // We need to offset the cascade to its place on the texture. const CVector3D size = (shadowRenderBound[1] - shadowRenderBound[0]) * 0.5f; if (!(cascade & 0x1)) shadowRenderBound[1].X += size.X * 2.0f; else shadowRenderBound[0].X -= size.X * 2.0f; if (!(cascade & 0x2)) shadowRenderBound[1].Y += size.Y * 2.0f; else shadowRenderBound[0].Y -= size.Y * 2.0f; } // Set the near and far planes to include just the shadow casters, // so we make full use of the depth texture's range. Add a bit of a // delta so we don't accidentally clip objects that are directly on // the planes. shadowRenderBound[0].Z = Cascades[cascade].ShadowCasterBound[0].Z - 2.f; shadowRenderBound[1].Z = Cascades[cascade].ShadowCasterBound[1].Z + 2.f; // Setup orthogonal projection (lightspace -> clip space) for shadowmap rendering CVector3D scale = shadowRenderBound[1] - shadowRenderBound[0]; CVector3D shift = (shadowRenderBound[1] + shadowRenderBound[0]) * -0.5; if (scale.X < 1.0) scale.X = 1.0; if (scale.Y < 1.0) scale.Y = 1.0; if (scale.Z < 1.0) scale.Z = 1.0; scale.X = 2.0 / scale.X; scale.Y = 2.0 / scale.Y; scale.Z = 2.0 / scale.Z; // make sure a given world position falls on a consistent shadowmap texel fractional offset float offsetX = fmod(shadowRenderBound[0].X - LightTransform._14, 2.0f/(scale.X*EffectiveWidth)); float offsetY = fmod(shadowRenderBound[0].Y - LightTransform._24, 2.0f/(scale.Y*EffectiveHeight)); CMatrix3D& lightProjection = Cascades[cascade].LightProjection; lightProjection.SetZero(); lightProjection._11 = scale.X; lightProjection._14 = (shift.X + offsetX) * scale.X; lightProjection._22 = scale.Y; lightProjection._24 = (shift.Y + offsetY) * scale.Y; lightProjection._33 = scale.Z; lightProjection._34 = shift.Z * scale.Z; lightProjection._44 = 1.0; // Calculate texture matrix by creating the clip space to texture coordinate matrix // and then concatenating all matrices that have been calculated so far float texscalex = scale.X * 0.5f * (float)EffectiveWidth / (float)Width; float texscaley = scale.Y * 0.5f * (float)EffectiveHeight / (float)Height; float texscalez = scale.Z * 0.5f; CMatrix3D lightToTex; lightToTex.SetZero(); lightToTex._11 = texscalex; lightToTex._14 = (offsetX - shadowRenderBound[0].X) * texscalex; lightToTex._22 = texscaley; lightToTex._24 = (offsetY - shadowRenderBound[0].Y) * texscaley; lightToTex._33 = texscalez; lightToTex._34 = -shadowRenderBound[0].Z * texscalez; lightToTex._44 = 1.0; Cascades[cascade].TextureMatrix = lightToTex * LightTransform; } // Create the shadow map void ShadowMapInternals::CreateTexture() { // Cleanup Framebuffer.reset(); Texture.reset(); DummyTexture.reset(); Renderer::Backend::GL::CDevice* backendDevice = g_VideoMode.GetBackendDevice(); CFG_GET_VAL("shadowquality", QualityLevel); // Get shadow map size as next power of two up from view width/height. int shadowMapSize; switch (QualityLevel) { // Low case -1: shadowMapSize = 512; break; // High case 1: shadowMapSize = 2048; break; // Ultra case 2: shadowMapSize = std::max(round_up_to_pow2(std::max(g_Renderer.GetWidth(), g_Renderer.GetHeight())) * 4, 4096); break; // Medium as is default: shadowMapSize = 1024; break; } // Clamp to the maximum texture size. shadowMapSize = std::min( shadowMapSize, static_cast(backendDevice->GetCapabilities().maxTextureSize)); Width = Height = shadowMapSize; // Since we're using a framebuffer object, the whole texture is available EffectiveWidth = Width; EffectiveHeight = Height; const char* formatName; Renderer::Backend::Format backendFormat = Renderer::Backend::Format::UNDEFINED; #if CONFIG2_GLES formatName = "DEPTH_COMPONENT"; backendFormat = Renderer::Backend::Format::D24; #else switch (DepthTextureBits) { case 16: formatName = "Format::D16"; backendFormat = Renderer::Backend::Format::D16; break; case 24: formatName = "Format::D24"; backendFormat = Renderer::Backend::Format::D24; break; case 32: formatName = "Format::D32"; backendFormat = Renderer::Backend::Format::D32; break; default: formatName = "Format::D24"; backendFormat = Renderer::Backend::Format::D24; break; } #endif ENSURE(formatName); LOGMESSAGE("Creating shadow texture (size %dx%d) (format = %s)", Width, Height, formatName); if (g_RenderingOptions.GetShadowAlphaFix()) { DummyTexture = backendDevice->CreateTexture2D("ShadowMapDummy", Renderer::Backend::Format::R8G8B8A8_UNORM, Width, Height, Renderer::Backend::Sampler::MakeDefaultSampler( Renderer::Backend::Sampler::Filter::NEAREST, Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE)); } Renderer::Backend::Sampler::Desc samplerDesc = Renderer::Backend::Sampler::MakeDefaultSampler( #if CONFIG2_GLES // GLES doesn't do depth comparisons, so treat it as a // basic unfiltered depth texture Renderer::Backend::Sampler::Filter::NEAREST, #else // Use LINEAR to trigger automatic PCF on some devices. Renderer::Backend::Sampler::Filter::LINEAR, #endif Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE); // Enable automatic depth comparisons samplerDesc.compareEnabled = true; samplerDesc.compareOp = Renderer::Backend::CompareOp::LESS_OR_EQUAL; Texture = backendDevice->CreateTexture2D("ShadowMapDepth", backendFormat, Width, Height, samplerDesc); Framebuffer = backendDevice->CreateFramebuffer("ShadowMapFramebuffer", g_RenderingOptions.GetShadowAlphaFix() ? DummyTexture.get() : nullptr, Texture.get()); if (!Framebuffer) { LOGERROR("Failed to create shadows framebuffer"); // Disable shadow rendering (but let the user try again if they want). g_RenderingOptions.SetShadows(false); } } // Set up to render into shadow map texture void ShadowMap::BeginRender() { Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext = g_Renderer.GetDeviceCommandContext(); deviceCommandContext->SetGraphicsPipelineState( Renderer::Backend::MakeDefaultGraphicsPipelineStateDesc()); { PROFILE("bind framebuffer"); ENSURE(m->Framebuffer); deviceCommandContext->SetFramebuffer(m->Framebuffer.get()); } // clear buffers { PROFILE("clear depth texture"); // In case we used m_ShadowAlphaFix, we ought to clear the unused // color buffer too, else Mali 400 drivers get confused. // Might as well clear stencil too for completeness. deviceCommandContext->ClearFramebuffer(); } m->SavedViewCamera = g_Renderer.GetSceneRenderer().GetViewCamera(); } void ShadowMap::PrepareCamera(const int cascade) { m->CalculateShadowMatrices(cascade); const SViewPort vp = { 0, 0, m->EffectiveWidth, m->EffectiveHeight }; g_Renderer.SetViewport(vp); CCamera camera = m->SavedViewCamera; camera.SetProjection(m->Cascades[cascade].LightProjection); camera.GetOrientation() = m->InvLightTransform; g_Renderer.GetSceneRenderer().SetViewCamera(camera); const SViewPort& cascadeViewPort = m->Cascades[cascade].ViewPort; Renderer::Backend::GL::CDeviceCommandContext::Rect scissorRect; scissorRect.x = cascadeViewPort.m_X; scissorRect.y = cascadeViewPort.m_Y; scissorRect.width = cascadeViewPort.m_Width; scissorRect.height = cascadeViewPort.m_Height; g_Renderer.GetDeviceCommandContext()->SetScissors(1, &scissorRect); } // Finish rendering into shadow map texture void ShadowMap::EndRender() { g_Renderer.GetDeviceCommandContext()->SetScissors(0, nullptr); g_Renderer.GetSceneRenderer().SetViewCamera(m->SavedViewCamera); { PROFILE("unbind framebuffer"); g_Renderer.GetDeviceCommandContext()->SetFramebuffer( g_VideoMode.GetBackendDevice()->GetCurrentBackbuffer()); } const SViewPort vp = { 0, 0, g_Renderer.GetWidth(), g_Renderer.GetHeight() }; g_Renderer.SetViewport(vp); } void ShadowMap::BindTo(Renderer::Backend::GL::CShaderProgram* shader) const { if (!shader->GetTextureBinding(str_shadowTex).Active() || !m->Texture) return; shader->BindTexture(str_shadowTex, m->Texture.get()); shader->Uniform(str_shadowScale, m->Width, m->Height, 1.0f / m->Width, 1.0f / m->Height); const CVector3D cameraForward = g_Renderer.GetSceneRenderer().GetCullCamera().GetOrientation().GetIn(); shader->Uniform(str_cameraForward, cameraForward.X, cameraForward.Y, cameraForward.Z, cameraForward.Dot(g_Renderer.GetSceneRenderer().GetCullCamera().GetOrientation().GetTranslation())); if (GetCascadeCount() == 1) { shader->Uniform(str_shadowTransform, m->Cascades[0].TextureMatrix); shader->Uniform(str_shadowDistance, m->Cascades[0].Distance); } else { std::vector shadowDistances; std::vector shadowTransforms; for (const ShadowMapInternals::Cascade& cascade : m->Cascades) { shadowDistances.emplace_back(cascade.Distance); shadowTransforms.emplace_back(cascade.TextureMatrix); } - shader->Uniform(str_shadowTransforms_0, GetCascadeCount(), shadowTransforms.data()); shader->Uniform(str_shadowTransforms, GetCascadeCount(), shadowTransforms.data()); - shader->Uniform(str_shadowDistances_0, GetCascadeCount(), shadowDistances.data()); shader->Uniform(str_shadowDistances, GetCascadeCount(), shadowDistances.data()); } } // Depth texture bits int ShadowMap::GetDepthTextureBits() const { return m->DepthTextureBits; } void ShadowMap::SetDepthTextureBits(int bits) { if (bits != m->DepthTextureBits) { m->Texture.reset(); m->Width = m->Height = 0; m->DepthTextureBits = bits; } } void ShadowMap::RenderDebugBounds() { // Render various shadow bounds: // Yellow = bounds of objects in view frustum that receive shadows // Red = culling frustum used to find potential shadow casters // Blue = frustum used for rendering the shadow map const CMatrix3D transform = g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection() * m->InvLightTransform; g_Renderer.GetDebugRenderer().DrawBoundingBox( m->ShadowReceiverBound, CColor(1.0f, 1.0f, 0.0f, 1.0f), transform, true); for (int cascade = 0; cascade < GetCascadeCount(); ++cascade) { g_Renderer.GetDebugRenderer().DrawBoundingBox( m->Cascades[cascade].ShadowRenderBound, CColor(0.0f, 0.0f, 1.0f, 0.10f), transform); g_Renderer.GetDebugRenderer().DrawBoundingBox( m->Cascades[cascade].ShadowRenderBound, CColor(0.0f, 0.0f, 1.0f, 0.5f), transform, true); const CFrustum frustum = GetShadowCasterCullFrustum(cascade); // We don't have a function to create a brush directly from a frustum, so use // the ugly approach of creating a large cube and then intersecting with the frustum const CBoundingBoxAligned dummy(CVector3D(-1e4, -1e4, -1e4), CVector3D(1e4, 1e4, 1e4)); CBrush brush(dummy); CBrush frustumBrush; brush.Intersect(frustum, frustumBrush); g_Renderer.GetDebugRenderer().DrawBrush(frustumBrush, CColor(1.0f, 0.0f, 0.0f, 0.1f)); g_Renderer.GetDebugRenderer().DrawBrush(frustumBrush, CColor(1.0f, 0.0f, 0.0f, 0.1f), true); } ogl_WarnIfError(); } void ShadowMap::RenderDebugTexture( Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext) { if (!m->Texture) return; #if !CONFIG2_GLES deviceCommandContext->BindTexture(0, GL_TEXTURE_2D, m->Texture->GetHandle()); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_NONE); #endif CShaderTechniquePtr texTech = g_Renderer.GetShaderManager().LoadEffect(str_canvas2d); deviceCommandContext->SetGraphicsPipelineState( texTech->GetGraphicsPipelineStateDesc()); deviceCommandContext->BeginPass(); Renderer::Backend::GL::CShaderProgram* texShader = texTech->GetShader(); texShader->Uniform(str_transform, GetDefaultGuiMatrix()); texShader->BindTexture(str_tex, m->Texture.get()); texShader->Uniform(str_colorAdd, CColor(0.0f, 0.0f, 0.0f, 1.0f)); texShader->Uniform(str_colorMul, CColor(1.0f, 1.0f, 1.0f, 0.0f)); texShader->Uniform(str_grayscaleFactor, 0.0f); float s = 256.f; float boxVerts[] = { 0,0, 0,s, s,0, s,0, 0,s, s,s }; float boxUV[] = { 0,0, 0,1, 1,0, 1,0, 0,1, 1,1 }; deviceCommandContext->SetVertexAttributeFormat( Renderer::Backend::VertexAttributeStream::POSITION, Renderer::Backend::Format::R32G32_SFLOAT, 0, 0, 0); deviceCommandContext->SetVertexAttributeFormat( Renderer::Backend::VertexAttributeStream::UV0, Renderer::Backend::Format::R32G32_SFLOAT, 0, 0, 1); deviceCommandContext->SetVertexBufferData(0, boxVerts); deviceCommandContext->SetVertexBufferData(1, boxUV); deviceCommandContext->Draw(0, 6); deviceCommandContext->EndPass(); #if !CONFIG2_GLES deviceCommandContext->BindTexture(0, GL_TEXTURE_2D, m->Texture->GetHandle()); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_R_TO_TEXTURE); #endif ogl_WarnIfError(); } int ShadowMap::GetCascadeCount() const { #if CONFIG2_GLES return 1; #else return m->ShadowsCoverMap ? 1 : m->CascadeCount; #endif } Index: ps/trunk/source/renderer/backend/gl/ShaderProgram.cpp =================================================================== --- ps/trunk/source/renderer/backend/gl/ShaderProgram.cpp (revision 26815) +++ ps/trunk/source/renderer/backend/gl/ShaderProgram.cpp (revision 26816) @@ -1,1317 +1,1329 @@ /* Copyright (C) 2022 Wildfire Games. * This file is part of 0 A.D. * * 0 A.D. is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * 0 A.D. is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with 0 A.D. If not, see . */ #include "precompiled.h" #include "ShaderProgram.h" #include "graphics/Color.h" #include "graphics/PreprocessorWrapper.h" #include "graphics/ShaderManager.h" #include "graphics/TextureManager.h" #include "maths/Matrix3D.h" #include "maths/Vector3D.h" #include "ps/CLogger.h" #include "ps/Filesystem.h" #include "ps/Profile.h" #include "ps/XML/Xeromyces.h" #include "renderer/backend/gl/Device.h" #include "renderer/backend/gl/DeviceCommandContext.h" #define USE_SHADER_XML_VALIDATION 1 #if USE_SHADER_XML_VALIDATION #include "ps/XML/RelaxNG.h" #include "ps/XML/XMLWriter.h" #endif #include namespace Renderer { namespace Backend { namespace GL { namespace { int GetStreamMask(const VertexAttributeStream stream) { return 1 << static_cast(stream); } GLint GLSizeFromFormat(const Format format) { GLint size = 1; if (format == Renderer::Backend::Format::R32_SFLOAT || format == Renderer::Backend::Format::R16_SINT) size = 1; else if ( format == Renderer::Backend::Format::R8G8_UNORM || format == Renderer::Backend::Format::R8G8_UINT || format == Renderer::Backend::Format::R16G16_SINT || format == Renderer::Backend::Format::R32G32_SFLOAT) size = 2; else if (format == Renderer::Backend::Format::R32G32B32_SFLOAT) size = 3; else if ( format == Renderer::Backend::Format::R32G32B32A32_SFLOAT || format == Renderer::Backend::Format::R8G8B8A8_UNORM || format == Renderer::Backend::Format::R8G8B8A8_UINT) size = 4; else debug_warn("Unsupported format."); return size; } GLenum GLTypeFromFormat(const Format format) { GLenum type = GL_FLOAT; if (format == Renderer::Backend::Format::R32_SFLOAT || format == Renderer::Backend::Format::R32G32_SFLOAT || format == Renderer::Backend::Format::R32G32B32_SFLOAT || format == Renderer::Backend::Format::R32G32B32A32_SFLOAT) type = GL_FLOAT; else if ( format == Renderer::Backend::Format::R16_SINT || format == Renderer::Backend::Format::R16G16_SINT) type = GL_SHORT; else if ( format == Renderer::Backend::Format::R8G8_UNORM || format == Renderer::Backend::Format::R8G8_UINT || format == Renderer::Backend::Format::R8G8B8A8_UNORM || format == Renderer::Backend::Format::R8G8B8A8_UINT) type = GL_UNSIGNED_BYTE; else debug_warn("Unsupported format."); return type; } GLboolean NormalizedFromFormat(const Format format) { switch (format) { case Format::R8G8_UNORM: FALLTHROUGH; case Format::R8G8B8_UNORM: FALLTHROUGH; case Format::R8G8B8A8_UNORM: FALLTHROUGH; case Format::R16_UNORM: FALLTHROUGH; case Format::R16G16_UNORM: return GL_TRUE; default: break; } return GL_FALSE; } int GetAttributeLocationFromStream( CDevice* device, const VertexAttributeStream stream) { // Old mapping makes sense only if we have an old/low-end hardware. Else we // need to use sequential numbering to fix #3054. We use presence of // compute shaders as a check that the hardware has universal CUs. if (device->GetCapabilities().computeShaders) { return static_cast(stream); } else { // Map known semantics onto the attribute locations documented by NVIDIA: // https://download.nvidia.com/developer/Papers/2005/OpenGL_2.0/NVIDIA_OpenGL_2.0_Support.pdf // https://developer.download.nvidia.com/opengl/glsl/glsl_release_notes.pdf switch (stream) { case VertexAttributeStream::POSITION: return 0; case VertexAttributeStream::NORMAL: return 2; case VertexAttributeStream::COLOR: return 3; case VertexAttributeStream::UV0: return 8; case VertexAttributeStream::UV1: return 9; case VertexAttributeStream::UV2: return 10; case VertexAttributeStream::UV3: return 11; case VertexAttributeStream::UV4: return 12; case VertexAttributeStream::UV5: return 13; case VertexAttributeStream::UV6: return 14; case VertexAttributeStream::UV7: return 15; } } debug_warn("Invalid attribute semantics"); return 0; } bool PreprocessShaderFile( bool arb, const CShaderDefines& defines, const VfsPath& path, CStr& source, std::vector& fileDependencies) { CVFSFile file; if (file.Load(g_VFS, path) != PSRETURN_OK) { LOGERROR("Failed to load shader file: '%s'", path.string8()); return false; } CPreprocessorWrapper preprocessor( [arb, &fileDependencies](const CStr& includePath, CStr& out) -> bool { const VfsPath includeFilePath( (arb ? L"shaders/arb/" : L"shaders/glsl/") + wstring_from_utf8(includePath)); // Add dependencies anyway to reload the shader when the file is // appeared. fileDependencies.push_back(includeFilePath); CVFSFile includeFile; if (includeFile.Load(g_VFS, includeFilePath) != PSRETURN_OK) { LOGERROR("Failed to load shader include file: '%s'", includeFilePath.string8()); return false; } out = includeFile.GetAsString(); return true; }); preprocessor.AddDefines(defines); #if CONFIG2_GLES if (!arb) { // GLES defines the macro "GL_ES" in its GLSL preprocessor, // but since we run our own preprocessor first, we need to explicitly // define it here preprocessor.AddDefine("GL_ES", "1"); } #endif source = preprocessor.Preprocess(file.GetAsString()); return true; } } // anonymous namespace #if !CONFIG2_GLES class CShaderProgramARB final : public CShaderProgram { public: CShaderProgramARB( CDevice* device, const VfsPath& vertexFilePath, const VfsPath& fragmentFilePath, const CShaderDefines& defines, const std::map& vertexIndexes, const std::map& fragmentIndexes, int streamflags) : CShaderProgram(streamflags), m_Device(device), m_VertexIndexes(vertexIndexes), m_FragmentIndexes(fragmentIndexes) { glGenProgramsARB(1, &m_VertexProgram); glGenProgramsARB(1, &m_FragmentProgram); std::vector newFileDependencies = {vertexFilePath, fragmentFilePath}; CStr vertexCode; if (!PreprocessShaderFile(true, defines, vertexFilePath, vertexCode, newFileDependencies)) return; CStr fragmentCode; if (!PreprocessShaderFile(true, defines, fragmentFilePath, fragmentCode, newFileDependencies)) return; m_FileDependencies = std::move(newFileDependencies); // TODO: replace by scoped bind. m_Device->GetActiveCommandContext()->SetGraphicsPipelineState( MakeDefaultGraphicsPipelineStateDesc()); if (!Compile(GL_VERTEX_PROGRAM_ARB, "vertex", m_VertexProgram, vertexFilePath, vertexCode)) return; if (!Compile(GL_FRAGMENT_PROGRAM_ARB, "fragment", m_FragmentProgram, fragmentFilePath, fragmentCode)) return; } ~CShaderProgramARB() override { glDeleteProgramsARB(1, &m_VertexProgram); glDeleteProgramsARB(1, &m_FragmentProgram); } bool Compile(GLuint target, const char* targetName, GLuint program, const VfsPath& file, const CStr& code) { ogl_WarnIfError(); glBindProgramARB(target, program); ogl_WarnIfError(); glProgramStringARB(target, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)code.length(), code.c_str()); if (ogl_SquelchError(GL_INVALID_OPERATION)) { GLint errPos = 0; glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos); int errLine = std::count(code.begin(), code.begin() + std::min((int)code.length(), errPos + 1), '\n') + 1; char* errStr = (char*)glGetString(GL_PROGRAM_ERROR_STRING_ARB); LOGERROR("Failed to compile %s program '%s' (line %d):\n%s", targetName, file.string8(), errLine, errStr); return false; } glBindProgramARB(target, 0); ogl_WarnIfError(); return true; } void Bind(CShaderProgram* previousShaderProgram) override { CShaderProgramARB* previousShaderProgramARB = nullptr; if (previousShaderProgram) previousShaderProgramARB = static_cast(previousShaderProgramARB); if (previousShaderProgramARB) previousShaderProgramARB->UnbindClientStates(); if (!previousShaderProgramARB || previousShaderProgramARB->m_VertexProgram != m_VertexProgram || previousShaderProgramARB->m_FragmentProgram != m_FragmentProgram) { glBindProgramARB(GL_VERTEX_PROGRAM_ARB, m_VertexProgram); glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, m_FragmentProgram); } BindClientStates(); } void Unbind() override { glBindProgramARB(GL_VERTEX_PROGRAM_ARB, 0); glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0); UnbindClientStates(); // TODO: should unbind textures, probably } int GetUniformVertexIndex(CStrIntern id) { std::map::iterator it = m_VertexIndexes.find(id); if (it == m_VertexIndexes.end()) return -1; return it->second; } frag_index_pair_t GetUniformFragmentIndex(CStrIntern id) { std::map::iterator it = m_FragmentIndexes.find(id); if (it == m_FragmentIndexes.end()) return std::make_pair(-1, 0); return it->second; } Binding GetTextureBinding(texture_id_t id) override { frag_index_pair_t fPair = GetUniformFragmentIndex(id); int index = fPair.first; if (index == -1) return Binding(); else return Binding((int)fPair.second, index); } void BindTexture(texture_id_t id, GLuint tex) override { frag_index_pair_t fPair = GetUniformFragmentIndex(id); int index = fPair.first; if (index != -1) { m_Device->GetActiveCommandContext()->BindTexture(index, fPair.second, tex); } } void BindTexture(Binding id, GLuint tex) override { int index = id.second; if (index != -1) { m_Device->GetActiveCommandContext()->BindTexture(index, id.first, tex); } } Binding GetUniformBinding(uniform_id_t id) override { return Binding(GetUniformVertexIndex(id), GetUniformFragmentIndex(id).first); } void Uniform(Binding id, float v0, float v1, float v2, float v3) override { if (id.first != -1) glProgramLocalParameter4fARB(GL_VERTEX_PROGRAM_ARB, (GLuint)id.first, v0, v1, v2, v3); if (id.second != -1) glProgramLocalParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, (GLuint)id.second, v0, v1, v2, v3); } void Uniform(Binding id, const CMatrix3D& v) override { if (id.first != -1) { glProgramLocalParameter4fARB(GL_VERTEX_PROGRAM_ARB, (GLuint)id.first+0, v._11, v._12, v._13, v._14); glProgramLocalParameter4fARB(GL_VERTEX_PROGRAM_ARB, (GLuint)id.first+1, v._21, v._22, v._23, v._24); glProgramLocalParameter4fARB(GL_VERTEX_PROGRAM_ARB, (GLuint)id.first+2, v._31, v._32, v._33, v._34); glProgramLocalParameter4fARB(GL_VERTEX_PROGRAM_ARB, (GLuint)id.first+3, v._41, v._42, v._43, v._44); } if (id.second != -1) { glProgramLocalParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, (GLuint)id.second+0, v._11, v._12, v._13, v._14); glProgramLocalParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, (GLuint)id.second+1, v._21, v._22, v._23, v._24); glProgramLocalParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, (GLuint)id.second+2, v._31, v._32, v._33, v._34); glProgramLocalParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, (GLuint)id.second+3, v._41, v._42, v._43, v._44); } } void Uniform(Binding id, size_t count, const CMatrix3D* v) override { ENSURE(count == 1); Uniform(id, v[0]); } void Uniform(Binding id, size_t count, const float* v) override { ENSURE(count == 4); Uniform(id, v[0], v[1], v[2], v[3]); } std::vector GetFileDependencies() const override { return m_FileDependencies; } private: CDevice* m_Device = nullptr; std::vector m_FileDependencies; GLuint m_VertexProgram; GLuint m_FragmentProgram; std::map m_VertexIndexes; // pair contains std::map m_FragmentIndexes; }; #endif // !CONFIG2_GLES class CShaderProgramGLSL final : public CShaderProgram { public: CShaderProgramGLSL( CDevice* device, const CStr& name, const VfsPath& vertexFilePath, const VfsPath& fragmentFilePath, const CShaderDefines& defines, const std::map& vertexAttribs, int streamflags) : CShaderProgram(streamflags), m_Device(device), m_Name(name), m_VertexAttribs(vertexAttribs) { for (std::map::iterator it = m_VertexAttribs.begin(); it != m_VertexAttribs.end(); ++it) m_ActiveVertexAttributes.emplace_back(it->second); std::sort(m_ActiveVertexAttributes.begin(), m_ActiveVertexAttributes.end()); m_Program = 0; m_VertexShader = glCreateShader(GL_VERTEX_SHADER); m_FragmentShader = glCreateShader(GL_FRAGMENT_SHADER); m_FileDependencies = {vertexFilePath, fragmentFilePath}; #if !CONFIG2_GLES if (m_Device->GetCapabilities().debugLabels) { glObjectLabel(GL_SHADER, m_VertexShader, -1, vertexFilePath.string8().c_str()); glObjectLabel(GL_SHADER, m_FragmentShader, -1, fragmentFilePath.string8().c_str()); } #endif std::vector newFileDependencies = {vertexFilePath, fragmentFilePath}; CStr vertexCode; if (!PreprocessShaderFile(false, defines, vertexFilePath, vertexCode, newFileDependencies)) return; CStr fragmentCode; if (!PreprocessShaderFile(false, defines, fragmentFilePath, fragmentCode, newFileDependencies)) return; m_FileDependencies = std::move(newFileDependencies); if (vertexCode.empty()) { LOGERROR("Failed to preprocess vertex shader: '%s'", vertexFilePath.string8()); return; } if (fragmentCode.empty()) { LOGERROR("Failed to preprocess fragment shader: '%s'", fragmentFilePath.string8()); return; } #if CONFIG2_GLES // Ugly hack to replace desktop GLSL 1.10/1.20 with GLSL ES 1.00, // and also to set default float precision for fragment shaders vertexCode.Replace("#version 110\n", "#version 100\n"); vertexCode.Replace("#version 110\r\n", "#version 100\n"); vertexCode.Replace("#version 120\n", "#version 100\n"); vertexCode.Replace("#version 120\r\n", "#version 100\n"); fragmentCode.Replace("#version 110\n", "#version 100\nprecision mediump float;\n"); fragmentCode.Replace("#version 110\r\n", "#version 100\nprecision mediump float;\n"); fragmentCode.Replace("#version 120\n", "#version 100\nprecision mediump float;\n"); fragmentCode.Replace("#version 120\r\n", "#version 100\nprecision mediump float;\n"); #endif // TODO: replace by scoped bind. m_Device->GetActiveCommandContext()->SetGraphicsPipelineState( MakeDefaultGraphicsPipelineStateDesc()); if (!Compile(m_VertexShader, vertexFilePath, vertexCode)) return; if (!Compile(m_FragmentShader, fragmentFilePath, fragmentCode)) return; if (!Link(vertexFilePath, fragmentFilePath)) return; } ~CShaderProgramGLSL() override { if (m_Program) glDeleteProgram(m_Program); glDeleteShader(m_VertexShader); glDeleteShader(m_FragmentShader); } bool Compile(GLuint shader, const VfsPath& file, const CStr& code) { const char* code_string = code.c_str(); GLint code_length = code.length(); glShaderSource(shader, 1, &code_string, &code_length); ogl_WarnIfError(); glCompileShader(shader); GLint ok = 0; glGetShaderiv(shader, GL_COMPILE_STATUS, &ok); GLint length = 0; glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &length); // Apparently sometimes GL_INFO_LOG_LENGTH is incorrectly reported as 0 // (http://code.google.com/p/android/issues/detail?id=9953) if (!ok && length == 0) length = 4096; if (length > 1) { char* infolog = new char[length]; glGetShaderInfoLog(shader, length, NULL, infolog); if (ok) LOGMESSAGE("Info when compiling shader '%s':\n%s", file.string8(), infolog); else LOGERROR("Failed to compile shader '%s':\n%s", file.string8(), infolog); delete[] infolog; } ogl_WarnIfError(); return ok; } bool Link(const VfsPath& vertexFilePath, const VfsPath& fragmentFilePath) { ENSURE(!m_Program); m_Program = glCreateProgram(); #if !CONFIG2_GLES if (m_Device->GetCapabilities().debugLabels) { glObjectLabel(GL_PROGRAM, m_Program, -1, m_Name.c_str()); } #endif glAttachShader(m_Program, m_VertexShader); ogl_WarnIfError(); glAttachShader(m_Program, m_FragmentShader); ogl_WarnIfError(); // Set up the attribute bindings explicitly, since apparently drivers // don't always pick the most efficient bindings automatically, // and also this lets us hardcode indexes into VertexPointer etc for (std::map::iterator it = m_VertexAttribs.begin(); it != m_VertexAttribs.end(); ++it) glBindAttribLocation(m_Program, it->second, it->first.c_str()); glLinkProgram(m_Program); GLint ok = 0; glGetProgramiv(m_Program, GL_LINK_STATUS, &ok); GLint length = 0; glGetProgramiv(m_Program, GL_INFO_LOG_LENGTH, &length); if (!ok && length == 0) length = 4096; if (length > 1) { char* infolog = new char[length]; glGetProgramInfoLog(m_Program, length, NULL, infolog); if (ok) LOGMESSAGE("Info when linking program '%s'+'%s':\n%s", vertexFilePath.string8(), fragmentFilePath.string8(), infolog); else LOGERROR("Failed to link program '%s'+'%s':\n%s", vertexFilePath.string8(), fragmentFilePath.string8(), infolog); delete[] infolog; } ogl_WarnIfError(); if (!ok) return false; m_Uniforms.clear(); m_Samplers.clear(); Bind(nullptr); ogl_WarnIfError(); GLint numUniforms = 0; glGetProgramiv(m_Program, GL_ACTIVE_UNIFORMS, &numUniforms); ogl_WarnIfError(); for (GLint i = 0; i < numUniforms; ++i) { + // TODO: use GL_ACTIVE_UNIFORM_MAX_LENGTH for the size. char name[256] = {0}; GLsizei nameLength = 0; GLint size = 0; GLenum type = 0; glGetActiveUniform(m_Program, i, ARRAY_SIZE(name), &nameLength, &size, &type, name); ogl_WarnIfError(); - GLint loc = glGetUniformLocation(m_Program, name); + const GLint location = glGetUniformLocation(m_Program, name); + + // OpenGL specification is a bit vague about a name returned by glGetActiveUniform. + // NVIDIA drivers return uniform name with "[0]", Intel Windows drivers without; + while (nameLength > 3 && + name[nameLength - 3] == '[' && + name[nameLength - 2] == '0' && + name[nameLength - 1] == ']') + { + nameLength -= 3; + } + name[nameLength] = 0; CStrIntern nameIntern(name); - m_Uniforms[nameIntern] = std::make_pair(loc, type); + m_Uniforms[nameIntern] = std::make_pair(location, type); // Assign sampler uniforms to sequential texture units if (type == GL_SAMPLER_2D || type == GL_SAMPLER_CUBE #if !CONFIG2_GLES || type == GL_SAMPLER_2D_SHADOW #endif ) { - int unit = (int)m_Samplers.size(); + const int unit = static_cast(m_Samplers.size()); m_Samplers[nameIntern].first = (type == GL_SAMPLER_CUBE ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D); m_Samplers[nameIntern].second = unit; - glUniform1i(loc, unit); // link uniform to unit + glUniform1i(location, unit); // link uniform to unit ogl_WarnIfError(); } } // TODO: verify that we're not using more samplers than is supported Unbind(); ogl_WarnIfError(); return true; } void Bind(CShaderProgram* previousShaderProgram) override { CShaderProgramGLSL* previousShaderProgramGLSL = nullptr; if (previousShaderProgram) previousShaderProgramGLSL = static_cast(previousShaderProgram); ENSURE(this != previousShaderProgramGLSL); glUseProgram(m_Program); if (previousShaderProgramGLSL) { std::vector::iterator itPrevious = previousShaderProgramGLSL->m_ActiveVertexAttributes.begin(); std::vector::iterator itNext = m_ActiveVertexAttributes.begin(); while ( itPrevious != previousShaderProgramGLSL->m_ActiveVertexAttributes.end() || itNext != m_ActiveVertexAttributes.end()) { if (itPrevious != previousShaderProgramGLSL->m_ActiveVertexAttributes.end() && itNext != m_ActiveVertexAttributes.end()) { if (*itPrevious == *itNext) { ++itPrevious; ++itNext; } else if (*itPrevious < *itNext) { glDisableVertexAttribArray(*itPrevious); ++itPrevious; } else if (*itPrevious > *itNext) { glEnableVertexAttribArray(*itNext); ++itNext; } } else if (itPrevious != previousShaderProgramGLSL->m_ActiveVertexAttributes.end()) { glDisableVertexAttribArray(*itPrevious); ++itPrevious; } else if (itNext != m_ActiveVertexAttributes.end()) { glEnableVertexAttribArray(*itNext); ++itNext; } } } else { for (const int index : m_ActiveVertexAttributes) glEnableVertexAttribArray(index); } m_ValidStreams = 0; } void Unbind() override { glUseProgram(0); for (const int index : m_ActiveVertexAttributes) glDisableVertexAttribArray(index); // TODO: should unbind textures, probably } Binding GetTextureBinding(texture_id_t id) override { std::map>::iterator it = m_Samplers.find(CStrIntern(id)); if (it == m_Samplers.end()) return Binding(); else return Binding((int)it->second.first, it->second.second); } void BindTexture(texture_id_t id, GLuint tex) override { std::map>::iterator it = m_Samplers.find(CStrIntern(id)); if (it == m_Samplers.end()) return; m_Device->GetActiveCommandContext()->BindTexture(it->second.second, it->second.first, tex); } void BindTexture(Binding id, GLuint tex) override { if (id.second == -1) return; m_Device->GetActiveCommandContext()->BindTexture(id.second, id.first, tex); } Binding GetUniformBinding(uniform_id_t id) override { std::map>::iterator it = m_Uniforms.find(id); if (it == m_Uniforms.end()) return Binding(); else return Binding(it->second.first, (int)it->second.second); } void Uniform(Binding id, float v0, float v1, float v2, float v3) override { if (id.first != -1) { if (id.second == GL_FLOAT) glUniform1f(id.first, v0); else if (id.second == GL_FLOAT_VEC2) glUniform2f(id.first, v0, v1); else if (id.second == GL_FLOAT_VEC3) glUniform3f(id.first, v0, v1, v2); else if (id.second == GL_FLOAT_VEC4) glUniform4f(id.first, v0, v1, v2, v3); else LOGERROR("CShaderProgramGLSL::Uniform(): Invalid uniform type (expected float, vec2, vec3, vec4)"); } } void Uniform(Binding id, const CMatrix3D& v) override { if (id.first != -1) { if (id.second == GL_FLOAT_MAT4) glUniformMatrix4fv(id.first, 1, GL_FALSE, &v._11); else LOGERROR("CShaderProgramGLSL::Uniform(): Invalid uniform type (expected mat4)"); } } void Uniform(Binding id, size_t count, const CMatrix3D* v) override { if (id.first != -1) { if (id.second == GL_FLOAT_MAT4) glUniformMatrix4fv(id.first, count, GL_FALSE, &v->_11); else LOGERROR("CShaderProgramGLSL::Uniform(): Invalid uniform type (expected mat4)"); } } void Uniform(Binding id, size_t count, const float* v) override { if (id.first != -1) { if (id.second == GL_FLOAT) glUniform1fv(id.first, count, v); else LOGERROR("CShaderProgramGLSL::Uniform(): Invalid uniform type (expected float)"); } } void VertexAttribPointer( const VertexAttributeStream stream, const Format format, const uint32_t offset, const uint32_t stride, const void* data) override { const int attributeLocation = GetAttributeLocationFromStream(m_Device, stream); std::vector::const_iterator it = std::lower_bound(m_ActiveVertexAttributes.begin(), m_ActiveVertexAttributes.end(), attributeLocation); if (it == m_ActiveVertexAttributes.end() || *it != attributeLocation) return; const GLint size = GLSizeFromFormat(format); const GLenum type = GLTypeFromFormat(format); const GLboolean normalized = NormalizedFromFormat(format); glVertexAttribPointer( attributeLocation, size, type, normalized, stride, static_cast(data) + offset); m_ValidStreams |= GetStreamMask(stream); } std::vector GetFileDependencies() const override { return m_FileDependencies; } private: CDevice* m_Device = nullptr; CStr m_Name; std::vector m_FileDependencies; std::map m_VertexAttribs; // Sorted list of active vertex attributes. std::vector m_ActiveVertexAttributes; GLuint m_Program; GLuint m_VertexShader, m_FragmentShader; std::map> m_Uniforms; std::map> m_Samplers; // texture target & unit chosen for each uniform sampler }; CShaderProgram::CShaderProgram(int streamflags) : m_StreamFlags(streamflags), m_ValidStreams(0) { } CShaderProgram::~CShaderProgram() = default; // static std::unique_ptr CShaderProgram::Create(CDevice* device, const CStr& name, const CShaderDefines& baseDefines) { PROFILE2("loading shader"); PROFILE2_ATTR("name: %s", name.c_str()); VfsPath xmlFilename = L"shaders/" + wstring_from_utf8(name) + L".xml"; CXeromyces XeroFile; PSRETURN ret = XeroFile.Load(g_VFS, xmlFilename); if (ret != PSRETURN_OK) return nullptr; #if USE_SHADER_XML_VALIDATION { // Serialize the XMB data and pass it to the validator XMLWriter_File shaderFile; shaderFile.SetPrettyPrint(false); shaderFile.XMB(XeroFile); bool ok = CXeromyces::ValidateEncoded("shader", name, shaderFile.GetOutput()); if (!ok) return nullptr; } #endif // Define all the elements and attributes used in the XML file #define EL(x) int el_##x = XeroFile.GetElementID(#x) #define AT(x) int at_##x = XeroFile.GetAttributeID(#x) EL(define); EL(fragment); EL(stream); EL(uniform); EL(vertex); AT(attribute); AT(file); AT(if); AT(loc); AT(name); AT(type); AT(value); #undef AT #undef EL CPreprocessorWrapper preprocessor; preprocessor.AddDefines(baseDefines); XMBElement root = XeroFile.GetRoot(); const bool isGLSL = root.GetAttributes().GetNamedItem(at_type) == "glsl"; VfsPath vertexFile; VfsPath fragmentFile; CShaderDefines defines = baseDefines; std::map vertexUniforms; std::map fragmentUniforms; std::map vertexAttribs; int streamFlags = 0; XERO_ITER_EL(root, child) { if (child.GetNodeName() == el_define) { defines.Add(CStrIntern(child.GetAttributes().GetNamedItem(at_name)), CStrIntern(child.GetAttributes().GetNamedItem(at_value))); } else if (child.GetNodeName() == el_vertex) { vertexFile = L"shaders/" + child.GetAttributes().GetNamedItem(at_file).FromUTF8(); XERO_ITER_EL(child, param) { XMBAttributeList attributes = param.GetAttributes(); CStr cond = attributes.GetNamedItem(at_if); if (!cond.empty() && !preprocessor.TestConditional(cond)) continue; if (param.GetNodeName() == el_uniform) { vertexUniforms[CStrIntern(attributes.GetNamedItem(at_name))] = attributes.GetNamedItem(at_loc).ToInt(); } else if (param.GetNodeName() == el_stream) { const CStr streamName = attributes.GetNamedItem(at_name); const CStr attributeName = attributes.GetNamedItem(at_attribute); if (attributeName.empty() && isGLSL) LOGERROR("Empty attribute name in vertex shader description '%s'", vertexFile.string8().c_str()); VertexAttributeStream stream = VertexAttributeStream::UV7; if (streamName == "pos") stream = VertexAttributeStream::POSITION; else if (streamName == "normal") stream = VertexAttributeStream::NORMAL; else if (streamName == "color") stream = VertexAttributeStream::COLOR; else if (streamName == "uv0") stream = VertexAttributeStream::UV0; else if (streamName == "uv1") stream = VertexAttributeStream::UV1; else if (streamName == "uv2") stream = VertexAttributeStream::UV2; else if (streamName == "uv3") stream = VertexAttributeStream::UV3; else if (streamName == "uv4") stream = VertexAttributeStream::UV4; else if (streamName == "uv5") stream = VertexAttributeStream::UV5; else if (streamName == "uv6") stream = VertexAttributeStream::UV6; else if (streamName == "uv7") stream = VertexAttributeStream::UV7; else LOGERROR("Unknown stream '%s' in vertex shader description '%s'", streamName.c_str(), vertexFile.string8().c_str()); if (isGLSL) { const int attributeLocation = GetAttributeLocationFromStream(device, stream); vertexAttribs[CStrIntern(attributeName)] = attributeLocation; } streamFlags |= GetStreamMask(stream); } } } else if (child.GetNodeName() == el_fragment) { fragmentFile = L"shaders/" + child.GetAttributes().GetNamedItem(at_file).FromUTF8(); XERO_ITER_EL(child, param) { XMBAttributeList attributes = param.GetAttributes(); CStr cond = attributes.GetNamedItem(at_if); if (!cond.empty() && !preprocessor.TestConditional(cond)) continue; if (param.GetNodeName() == el_uniform) { // A somewhat incomplete listing, missing "shadow" and "rect" versions // which are interpreted as 2D (NB: our shadowmaps may change // type based on user config). GLenum type = GL_TEXTURE_2D; const CStr t = attributes.GetNamedItem(at_type); if (t == "sampler1D") #if CONFIG2_GLES debug_warn(L"sampler1D not implemented on GLES"); #else type = GL_TEXTURE_1D; #endif else if (t == "sampler2D") type = GL_TEXTURE_2D; else if (t == "sampler3D") #if CONFIG2_GLES debug_warn(L"sampler3D not implemented on GLES"); #else type = GL_TEXTURE_3D; #endif else if (t == "samplerCube") type = GL_TEXTURE_CUBE_MAP; fragmentUniforms[CStrIntern(attributes.GetNamedItem(at_name))] = std::make_pair(attributes.GetNamedItem(at_loc).ToInt(), type); } } } } if (isGLSL) return CShaderProgram::ConstructGLSL(device, name, vertexFile, fragmentFile, defines, vertexAttribs, streamFlags); else return CShaderProgram::ConstructARB(device, vertexFile, fragmentFile, defines, vertexUniforms, fragmentUniforms, streamFlags); } #if CONFIG2_GLES // static std::unique_ptr CShaderProgram::ConstructARB(CDevice* UNUSED(device), const VfsPath& vertexFile, const VfsPath& fragmentFile, const CShaderDefines& UNUSED(defines), const std::map& UNUSED(vertexIndexes), const std::map& UNUSED(fragmentIndexes), int UNUSED(streamflags)) { LOGERROR("CShaderProgram::ConstructARB: '%s'+'%s': ARB shaders not supported on this device", vertexFile.string8(), fragmentFile.string8()); return nullptr; } #else // static std::unique_ptr CShaderProgram::ConstructARB( CDevice* device, const VfsPath& vertexFile, const VfsPath& fragmentFile, const CShaderDefines& defines, const std::map& vertexIndexes, const std::map& fragmentIndexes, int streamflags) { return std::make_unique( device, vertexFile, fragmentFile, defines, vertexIndexes, fragmentIndexes, streamflags); } #endif // static std::unique_ptr CShaderProgram::ConstructGLSL( CDevice* device, const CStr& name, const VfsPath& vertexFile, const VfsPath& fragmentFile, const CShaderDefines& defines, const std::map& vertexAttribs, int streamflags) { return std::make_unique( device, name, vertexFile, fragmentFile, defines, vertexAttribs, streamflags); } void CShaderProgram::BindTexture(texture_id_t id, const Renderer::Backend::GL::CTexture* tex) { BindTexture(id, tex->GetHandle()); } void CShaderProgram::BindTexture(Binding id, const Renderer::Backend::GL::CTexture* tex) { BindTexture(id, tex->GetHandle()); } void CShaderProgram::Uniform(Binding id, int v) { Uniform(id, (float)v, (float)v, (float)v, (float)v); } void CShaderProgram::Uniform(Binding id, float v) { Uniform(id, v, v, v, v); } void CShaderProgram::Uniform(Binding id, float v0, float v1) { Uniform(id, v0, v1, 0.0f, 0.0f); } void CShaderProgram::Uniform(Binding id, const CVector3D& v) { Uniform(id, v.X, v.Y, v.Z, 0.0f); } void CShaderProgram::Uniform(Binding id, const CColor& v) { Uniform(id, v.r, v.g, v.b, v.a); } void CShaderProgram::Uniform(uniform_id_t id, int v) { Uniform(GetUniformBinding(id), (float)v, (float)v, (float)v, (float)v); } void CShaderProgram::Uniform(uniform_id_t id, float v) { Uniform(GetUniformBinding(id), v, v, v, v); } void CShaderProgram::Uniform(uniform_id_t id, float v0, float v1) { Uniform(GetUniformBinding(id), v0, v1, 0.0f, 0.0f); } void CShaderProgram::Uniform(uniform_id_t id, const CVector3D& v) { Uniform(GetUniformBinding(id), v.X, v.Y, v.Z, 0.0f); } void CShaderProgram::Uniform(uniform_id_t id, const CColor& v) { Uniform(GetUniformBinding(id), v.r, v.g, v.b, v.a); } void CShaderProgram::Uniform(uniform_id_t id, float v0, float v1, float v2, float v3) { Uniform(GetUniformBinding(id), v0, v1, v2, v3); } void CShaderProgram::Uniform(uniform_id_t id, const CMatrix3D& v) { Uniform(GetUniformBinding(id), v); } void CShaderProgram::Uniform(uniform_id_t id, size_t count, const CMatrix3D* v) { Uniform(GetUniformBinding(id), count, v); } void CShaderProgram::Uniform(uniform_id_t id, size_t count, const float* v) { Uniform(GetUniformBinding(id), count, v); } // These should all be overridden by CShaderProgramGLSL, and not used // if a non-GLSL shader was loaded instead: void CShaderProgram::VertexAttribPointer(attrib_id_t UNUSED(id), const Renderer::Backend::Format UNUSED(format), GLboolean UNUSED(normalized), GLsizei UNUSED(stride), const void* UNUSED(pointer)) { debug_warn("Shader type doesn't support VertexAttribPointer"); } #if CONFIG2_GLES // These should all be overridden by CShaderProgramGLSL // (GLES doesn't support any other types of shader program): void CShaderProgram::VertexPointer(const Renderer::Backend::Format UNUSED(format), GLsizei UNUSED(stride), const void* UNUSED(pointer)) { debug_warn("CShaderProgram::VertexPointer should be overridden"); } void CShaderProgram::NormalPointer(const Renderer::Backend::Format UNUSED(format), GLsizei UNUSED(stride), const void* UNUSED(pointer)) { debug_warn("CShaderProgram::NormalPointer should be overridden"); } void CShaderProgram::ColorPointer(const Renderer::Backend::Format UNUSED(format), GLsizei UNUSED(stride), const void* UNUSED(pointer)) { debug_warn("CShaderProgram::ColorPointer should be overridden"); } void CShaderProgram::TexCoordPointer(GLenum UNUSED(texture), const Renderer::Backend::Format UNUSED(format), GLsizei UNUSED(stride), const void* UNUSED(pointer)) { debug_warn("CShaderProgram::TexCoordPointer should be overridden"); } #else // These are overridden by CShaderProgramGLSL, but fixed-function and ARB shaders // both use the fixed-function vertex attribute pointers so we'll share their // definitions here: void CShaderProgram::VertexPointer(const Renderer::Backend::Format format, GLsizei stride, const void* pointer) { const GLint size = GLSizeFromFormat(format); ENSURE(2 <= size && size <= 4); const GLenum type = GLTypeFromFormat(format); glVertexPointer(size, type, stride, pointer); m_ValidStreams |= GetStreamMask(VertexAttributeStream::POSITION); } void CShaderProgram::NormalPointer(const Renderer::Backend::Format format, GLsizei stride, const void* pointer) { ENSURE(format == Renderer::Backend::Format::R32G32B32_SFLOAT); glNormalPointer(GL_FLOAT, stride, pointer); m_ValidStreams |= GetStreamMask(VertexAttributeStream::NORMAL); } void CShaderProgram::ColorPointer(const Renderer::Backend::Format format, GLsizei stride, const void* pointer) { const GLint size = GLSizeFromFormat(format); ENSURE(3 <= size && size <= 4); const GLenum type = GLTypeFromFormat(format); glColorPointer(size, type, stride, pointer); m_ValidStreams |= GetStreamMask(VertexAttributeStream::COLOR); } void CShaderProgram::TexCoordPointer(GLenum texture, const Renderer::Backend::Format format, GLsizei stride, const void* pointer) { glClientActiveTextureARB(texture); const GLint size = GLSizeFromFormat(format); ENSURE(1 <= size && size <= 4); const GLenum type = GLTypeFromFormat(format); glTexCoordPointer(size, type, stride, pointer); glClientActiveTextureARB(GL_TEXTURE0); m_ValidStreams |= GetStreamMask(VertexAttributeStream::UV0) << (texture - GL_TEXTURE0); } void CShaderProgram::BindClientStates() { ENSURE(m_StreamFlags == (m_StreamFlags & ( GetStreamMask(VertexAttributeStream::POSITION) | GetStreamMask(VertexAttributeStream::NORMAL) | GetStreamMask(VertexAttributeStream::COLOR) | GetStreamMask(VertexAttributeStream::UV0) | GetStreamMask(VertexAttributeStream::UV1)))); // Enable all the desired client states for non-GLSL rendering if (m_StreamFlags & GetStreamMask(VertexAttributeStream::POSITION)) glEnableClientState(GL_VERTEX_ARRAY); if (m_StreamFlags & GetStreamMask(VertexAttributeStream::NORMAL)) glEnableClientState(GL_NORMAL_ARRAY); if (m_StreamFlags & GetStreamMask(VertexAttributeStream::COLOR)) glEnableClientState(GL_COLOR_ARRAY); if (m_StreamFlags & GetStreamMask(VertexAttributeStream::UV0)) { glClientActiveTextureARB(GL_TEXTURE0); glEnableClientState(GL_TEXTURE_COORD_ARRAY); } if (m_StreamFlags & GetStreamMask(VertexAttributeStream::UV1)) { glClientActiveTextureARB(GL_TEXTURE1); glEnableClientState(GL_TEXTURE_COORD_ARRAY); glClientActiveTextureARB(GL_TEXTURE0); } // Rendering code must subsequently call VertexPointer etc for all of the streams // that were activated in this function, else AssertPointersBound will complain // that some arrays were unspecified m_ValidStreams = 0; } void CShaderProgram::UnbindClientStates() { if (m_StreamFlags & GetStreamMask(VertexAttributeStream::POSITION)) glDisableClientState(GL_VERTEX_ARRAY); if (m_StreamFlags & GetStreamMask(VertexAttributeStream::NORMAL)) glDisableClientState(GL_NORMAL_ARRAY); if (m_StreamFlags & GetStreamMask(VertexAttributeStream::COLOR)) glDisableClientState(GL_COLOR_ARRAY); if (m_StreamFlags & GetStreamMask(VertexAttributeStream::UV0)) { glClientActiveTextureARB(GL_TEXTURE0); glDisableClientState(GL_TEXTURE_COORD_ARRAY); } if (m_StreamFlags & GetStreamMask(VertexAttributeStream::UV1)) { glClientActiveTextureARB(GL_TEXTURE1); glDisableClientState(GL_TEXTURE_COORD_ARRAY); glClientActiveTextureARB(GL_TEXTURE0); } } #endif // !CONFIG2_GLES bool CShaderProgram::IsStreamActive(const VertexAttributeStream stream) const { return (m_StreamFlags & GetStreamMask(stream)) != 0; } void CShaderProgram::VertexAttribPointer( const VertexAttributeStream stream, const Format format, const uint32_t offset, const uint32_t stride, const void* data) { switch (stream) { case VertexAttributeStream::POSITION: VertexPointer(format, stride, static_cast(data) + offset); break; case VertexAttributeStream::NORMAL: NormalPointer(format, stride, static_cast(data) + offset); break; case VertexAttributeStream::COLOR: ColorPointer(format, stride, static_cast(data) + offset); break; case VertexAttributeStream::UV0: FALLTHROUGH; case VertexAttributeStream::UV1: FALLTHROUGH; case VertexAttributeStream::UV2: FALLTHROUGH; case VertexAttributeStream::UV3: FALLTHROUGH; case VertexAttributeStream::UV4: FALLTHROUGH; case VertexAttributeStream::UV5: FALLTHROUGH; case VertexAttributeStream::UV6: FALLTHROUGH; case VertexAttributeStream::UV7: { const int indexOffset = static_cast(stream) - static_cast(VertexAttributeStream::UV0); TexCoordPointer(GL_TEXTURE0 + indexOffset, format, stride, static_cast(data) + offset); break; } default: debug_warn("Unsupported stream"); }; } void CShaderProgram::AssertPointersBound() { ENSURE((m_StreamFlags & ~m_ValidStreams) == 0); } } // namespace GL } // namespace Backend } // namespace Renderer