Index: ps/trunk/source/renderer/DecalRData.cpp
===================================================================
--- ps/trunk/source/renderer/DecalRData.cpp	(revision 27181)
+++ ps/trunk/source/renderer/DecalRData.cpp	(revision 27182)
@@ -1,376 +1,375 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "DecalRData.h"
 
 #include "graphics/Decal.h"
 #include "graphics/Model.h"
 #include "graphics/ShaderManager.h"
 #include "graphics/Terrain.h"
 #include "graphics/TextureManager.h"
 #include "lib/allocators/DynamicArena.h"
 #include "lib/allocators/STLAllocators.h"
 #include "ps/CLogger.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Game.h"
 #include "ps/Profile.h"
 #include "renderer/Renderer.h"
 #include "renderer/TerrainRenderer.h"
 #include "simulation2/components/ICmpWaterManager.h"
 #include "simulation2/Simulation2.h"
 
 #include <algorithm>
 
 // TODO: Currently each decal is a separate CDecalRData. We might want to use
 // lots of decals for special effects like shadows, footprints, etc, in which
 // case we should probably redesign this to batch them all together for more
 // efficient rendering.
 
 namespace
 {
 
 struct SDecalBatch
 {
 	CDecalRData* decal;
 	CStrIntern shaderEffect;
 	CShaderDefines shaderDefines;
 	CVertexBuffer::VBChunk* vertices;
 	CVertexBuffer::VBChunk* indices;
 };
 
 struct SDecalBatchComparator
 {
 	bool operator()(const SDecalBatch& lhs, const SDecalBatch& rhs) const
 	{
 		if (lhs.shaderEffect != rhs.shaderEffect)
 			return lhs.shaderEffect < rhs.shaderEffect;
 		if (lhs.shaderDefines != rhs.shaderDefines)
 			return lhs.shaderDefines < rhs.shaderDefines;
 		const CMaterial& lhsMaterial = lhs.decal->GetDecal()->m_Decal.m_Material;
 		const CMaterial& rhsMaterial = rhs.decal->GetDecal()->m_Decal.m_Material;
 		if (lhsMaterial.GetDiffuseTexture() != rhsMaterial.GetDiffuseTexture())
 			return lhsMaterial.GetDiffuseTexture() < rhsMaterial.GetDiffuseTexture();
 		if (lhs.vertices->m_Owner != rhs.vertices->m_Owner)
 			return lhs.vertices->m_Owner < rhs.vertices->m_Owner;
 		if (lhs.indices->m_Owner != rhs.indices->m_Owner)
 			return lhs.indices->m_Owner < rhs.indices->m_Owner;
 		return lhs.decal < rhs.decal;
 	}
 };
 
 } // anonymous namespace
 
 CDecalRData::CDecalRData(CModelDecal* decal, CSimulation2* simulation)
 	: m_Decal(decal), m_Simulation(simulation)
 {
 	BuildVertexData();
 }
 
 CDecalRData::~CDecalRData() = default;
 
 void CDecalRData::Update(CSimulation2* simulation)
 {
 	m_Simulation = simulation;
 	if (m_UpdateFlags != 0)
 	{
 		BuildVertexData();
 		m_UpdateFlags = 0;
 	}
 }
 
 void CDecalRData::RenderDecals(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const std::vector<CDecalRData*>& decals, const CShaderDefines& context, ShadowMap* shadow)
 {
 	PROFILE3("render terrain decals");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render terrain decals");
 
 	using Arena = Allocators::DynamicArena<256 * KiB>;
 
 	Arena arena;
 
 	using Batches = std::vector<SDecalBatch, ProxyAllocator<SDecalBatch, Arena>>;
 	Batches batches((Batches::allocator_type(arena)));
 	batches.reserve(decals.size());
 
 	CShaderDefines contextDecal = context;
 	contextDecal.Add(str_DECAL, str_1);
 
 	for (CDecalRData* decal : decals)
 	{
 		CMaterial& material = decal->m_Decal->m_Decal.m_Material;
 
 		if (material.GetShaderEffect().empty())
 		{
 			LOGERROR("Terrain renderer failed to load shader effect.\n");
 			continue;
 		}
 
 		if (material.GetSamplers().empty() || !decal->m_VBDecals || !decal->m_VBDecalsIndices)
 			continue;
 
 		SDecalBatch batch;
 		batch.decal = decal;
 		batch.shaderEffect = material.GetShaderEffect();
 		batch.shaderDefines = material.GetShaderDefines();
 		batch.vertices = decal->m_VBDecals.Get();
 		batch.indices = decal->m_VBDecalsIndices.Get();
 
 		batches.emplace_back(std::move(batch));
 	}
 
 	if (batches.empty())
 		return;
 
 	std::sort(batches.begin(), batches.end(), SDecalBatchComparator());
 
 	CVertexBuffer* lastIB = nullptr;
 	for (auto itTechBegin = batches.begin(), itTechEnd = batches.begin(); itTechBegin != batches.end(); itTechBegin = itTechEnd)
 	{
 		while (itTechEnd != batches.end() &&
 			itTechBegin->shaderEffect == itTechEnd->shaderEffect &&
 			itTechBegin->shaderDefines == itTechEnd->shaderDefines)
 		{
 			++itTechEnd;
 		}
 
 		CShaderDefines defines = contextDecal;
 		defines.SetMany(itTechBegin->shaderDefines);
 		CShaderTechniquePtr techBase = g_Renderer.GetShaderManager().LoadEffect(
 			itTechBegin->shaderEffect, defines);
 		if (!techBase)
 		{
 			LOGERROR("Terrain renderer failed to load shader effect (%s)\n",
 				itTechBegin->shaderEffect.c_str());
 			continue;
 		}
 
 		const int numPasses = techBase->GetNumPasses();
 		for (int pass = 0; pass < numPasses; ++pass)
 		{
 			Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 				techBase->GetGraphicsPipelineStateDesc(pass);
 			pipelineStateDesc.blendState.enabled = true;
 			pipelineStateDesc.blendState.srcColorBlendFactor = pipelineStateDesc.blendState.srcAlphaBlendFactor =
 				Renderer::Backend::BlendFactor::SRC_ALPHA;
 			pipelineStateDesc.blendState.dstColorBlendFactor = pipelineStateDesc.blendState.dstAlphaBlendFactor =
 				Renderer::Backend::BlendFactor::ONE_MINUS_SRC_ALPHA;
 			pipelineStateDesc.blendState.colorBlendOp = pipelineStateDesc.blendState.alphaBlendOp =
 				Renderer::Backend::BlendOp::ADD;
 			pipelineStateDesc.depthStencilState.depthWriteEnabled = false;
 			deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 			deviceCommandContext->BeginPass();
 
 			Renderer::Backend::IShaderProgram* shader = techBase->GetShader(pass);
 			TerrainRenderer::PrepareShader(deviceCommandContext, shader, shadow);
 
 			CColor shadingColor(1.0f, 1.0f, 1.0f, 1.0f);
 			const int32_t shadingColorBindingSlot =
 				shader->GetBindingSlot(str_shadingColor);
 			deviceCommandContext->SetUniform(
 				shadingColorBindingSlot, shadingColor.AsFloatArray());
 
 			CShaderUniforms currentStaticUniforms;
 
 			CVertexBuffer* lastVB = nullptr;
 			for (auto itDecal = itTechBegin; itDecal != itTechEnd; ++itDecal)
 			{
 				SDecalBatch& batch = *itDecal;
 				CDecalRData* decal = batch.decal;
 				CMaterial& material = decal->m_Decal->m_Decal.m_Material;
 
 				const CMaterial::SamplersVector& samplers = material.GetSamplers();
 				for (const CMaterial::TextureSampler& sampler : samplers)
 					sampler.Sampler->UploadBackendTextureIfNeeded(deviceCommandContext);
 				for (const CMaterial::TextureSampler& sampler : samplers)
 				{
 					deviceCommandContext->SetTexture(
 						shader->GetBindingSlot(sampler.Name),
 						sampler.Sampler->GetBackendTexture());
 				}
 
 				if (currentStaticUniforms != material.GetStaticUniforms())
 				{
 					currentStaticUniforms = material.GetStaticUniforms();
 					material.GetStaticUniforms().BindUniforms(deviceCommandContext, shader);
 				}
 
 				// TODO: Need to handle floating decals correctly. In particular, we need
 				// to render non-floating before water and floating after water (to get
 				// the blending right), and we also need to apply the correct lighting in
 				// each case, which doesn't really seem possible with the current
 				// TerrainRenderer.
 				// Also, need to mark the decals as dirty when water height changes.
 
 				//	m_Decal->GetBounds().Render();
 
 				if (shadingColor != decal->m_Decal->GetShadingColor())
 				{
 					shadingColor = decal->m_Decal->GetShadingColor();
 					deviceCommandContext->SetUniform(
 						shadingColorBindingSlot, shadingColor.AsFloatArray());
 				}
 
 				if (lastVB != batch.vertices->m_Owner)
 				{
 					lastVB = batch.vertices->m_Owner;
-
-					batch.vertices->m_Owner->UploadIfNeeded(deviceCommandContext);
+					ENSURE(!lastVB->GetBuffer()->IsDynamic());
 
 					const uint32_t stride = sizeof(SDecalVertex);
 
 					deviceCommandContext->SetVertexAttributeFormat(
 						Renderer::Backend::VertexAttributeStream::POSITION,
 						Renderer::Backend::Format::R32G32B32_SFLOAT,
 						offsetof(SDecalVertex, m_Position), stride,
 						Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 					deviceCommandContext->SetVertexAttributeFormat(
 						Renderer::Backend::VertexAttributeStream::NORMAL,
 						Renderer::Backend::Format::R32G32B32_SFLOAT,
 						offsetof(SDecalVertex, m_Normal), stride,
 						Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 					deviceCommandContext->SetVertexAttributeFormat(
 						Renderer::Backend::VertexAttributeStream::UV0,
 						Renderer::Backend::Format::R32G32_SFLOAT,
 						offsetof(SDecalVertex, m_UV), stride,
 						Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 
 					deviceCommandContext->SetVertexBuffer(
 						0, batch.vertices->m_Owner->GetBuffer(), 0);
 				}
 
 				if (lastIB != batch.indices->m_Owner)
 				{
 					lastIB = batch.indices->m_Owner;
-					batch.indices->m_Owner->UploadIfNeeded(deviceCommandContext);
+					ENSURE(!lastIB->GetBuffer()->IsDynamic());
 					deviceCommandContext->SetIndexBuffer(batch.indices->m_Owner->GetBuffer());
 				}
 
 				deviceCommandContext->DrawIndexed(batch.indices->m_Index, batch.indices->m_Count, 0);
 
 				// bump stats
 				g_Renderer.m_Stats.m_DrawCalls++;
 				g_Renderer.m_Stats.m_TerrainTris += batch.indices->m_Count / 3;
 			}
 
 			deviceCommandContext->EndPass();
 		}
 	}
 }
 
 void CDecalRData::BuildVertexData()
 {
 	PROFILE("decal build");
 
 	const SDecal& decal = m_Decal->m_Decal;
 
 	// TODO: Currently this constructs an axis-aligned bounding rectangle around
 	// the decal. It would be more efficient for rendering if we excluded tiles
 	// that are outside the (non-axis-aligned) decal rectangle.
 
 	ssize_t i0, j0, i1, j1;
 	m_Decal->CalcVertexExtents(i0, j0, i1, j1);
 	// Currently CalcVertexExtents might return empty rectangle, that means
 	// we can't render it.
 	if (i1 <= i0 || j1 <= j0)
 	{
 		// We have nothing to render.
 		m_VBDecals.Reset();
 		m_VBDecalsIndices.Reset();
 		return;
 	}
 
 	CmpPtr<ICmpWaterManager> cmpWaterManager(*m_Simulation, SYSTEM_ENTITY);
 
 	std::vector<SDecalVertex> vertices((i1 - i0 + 1) * (j1 - j0 + 1));
 
 	for (ssize_t j = j0, idx = 0; j <= j1; ++j)
 	{
 		for (ssize_t i = i0; i <= i1; ++i, ++idx)
 		{
 			SDecalVertex& vertex = vertices[idx];
 			m_Decal->m_Terrain->CalcPosition(i, j, vertex.m_Position);
 
 			if (decal.m_Floating && cmpWaterManager)
 			{
 				vertex.m_Position.Y = std::max(
 					vertex.m_Position.Y,
 					cmpWaterManager->GetExactWaterLevel(vertex.m_Position.X, vertex.m_Position.Z));
 			}
 
 			m_Decal->m_Terrain->CalcNormal(i, j, vertex.m_Normal);
 
 			// Map from world space back into decal texture space.
 			CVector3D inv = m_Decal->GetInvTransform().Transform(vertex.m_Position);
 			vertex.m_UV.X = 0.5f + (inv.X - decal.m_OffsetX) / decal.m_SizeX;
 			// Flip V to match our texture convention.
 			vertex.m_UV.Y = 0.5f - (inv.Z - decal.m_OffsetZ) / decal.m_SizeZ;
 		}
 	}
 
 	if (!m_VBDecals || m_VBDecals->m_Count != vertices.size())
 	{
 		m_VBDecals = g_VBMan.AllocateChunk(
 			sizeof(SDecalVertex), vertices.size(),
 			Renderer::Backend::IBuffer::Type::VERTEX, false);
 	}
 	m_VBDecals->m_Owner->UpdateChunkVertices(m_VBDecals.Get(), vertices.data());
 
 	std::vector<u16> indices((i1 - i0) * (j1 - j0) * 6);
 
 	const ssize_t w = i1 - i0 + 1;
 	auto itIdx = indices.begin();
 	const size_t base = m_VBDecals->m_Index;
 	for (ssize_t dj = 0; dj < j1 - j0; ++dj)
 	{
 		for (ssize_t di = 0; di < i1 - i0; ++di)
 		{
 			const bool dir = m_Decal->m_Terrain->GetTriangulationDir(i0 + di, j0 + dj);
 			if (dir)
 			{
 				*itIdx++ = u16(((dj + 0) * w + (di + 0)) + base);
 				*itIdx++ = u16(((dj + 0) * w + (di + 1)) + base);
 				*itIdx++ = u16(((dj + 1) * w + (di + 0)) + base);
 
 				*itIdx++ = u16(((dj + 0) * w + (di + 1)) + base);
 				*itIdx++ = u16(((dj + 1) * w + (di + 1)) + base);
 				*itIdx++ = u16(((dj + 1) * w + (di + 0)) + base);
 			}
 			else
 			{
 				*itIdx++ = u16(((dj + 0) * w + (di + 0)) + base);
 				*itIdx++ = u16(((dj + 0) * w + (di + 1)) + base);
 				*itIdx++ = u16(((dj + 1) * w + (di + 1)) + base);
 
 				*itIdx++ = u16(((dj + 1) * w + (di + 1)) + base);
 				*itIdx++ = u16(((dj + 1) * w + (di + 0)) + base);
 				*itIdx++ = u16(((dj + 0) * w + (di + 0)) + base);
 			}
 		}
 	}
 
 	// Construct vertex buffer.
 	if (!m_VBDecalsIndices || m_VBDecalsIndices->m_Count != indices.size())
 	{
 		m_VBDecalsIndices = g_VBMan.AllocateChunk(
 			sizeof(u16), indices.size(),
 			Renderer::Backend::IBuffer::Type::INDEX, false);
 	}
 	m_VBDecalsIndices->m_Owner->UpdateChunkVertices(m_VBDecalsIndices.Get(), indices.data());
 }
Index: ps/trunk/source/renderer/HWLightingModelRenderer.cpp
===================================================================
--- ps/trunk/source/renderer/HWLightingModelRenderer.cpp	(revision 27181)
+++ ps/trunk/source/renderer/HWLightingModelRenderer.cpp	(revision 27182)
@@ -1,257 +1,255 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "renderer/HWLightingModelRenderer.h"
 
 #include "graphics/Color.h"
 #include "graphics/LightEnv.h"
 #include "graphics/Model.h"
 #include "graphics/ModelDef.h"
 #include "graphics/ShaderProgram.h"
 #include "lib/bits.h"
 #include "lib/sysdep/rtl.h"
 #include "maths/Vector3D.h"
 #include "renderer/Renderer.h"
 #include "renderer/RenderModifiers.h"
 #include "renderer/VertexArray.h"
 
 
 struct ShaderModelDef : public CModelDefRPrivate
 {
 	/// Indices are the same for all models, so share them
 	VertexIndexArray m_IndexArray;
 
 	/// Static per-CModelDef vertex array
 	VertexArray m_Array;
 
 	/// The number of UVs is determined by the model
 	std::vector<VertexArray::Attribute> m_UVs;
 
 	ShaderModelDef(const CModelDefPtr& mdef);
 };
 
 
 ShaderModelDef::ShaderModelDef(const CModelDefPtr& mdef)
 	: m_IndexArray(false),
 	m_Array(Renderer::Backend::IBuffer::Type::VERTEX, false)
 {
 	size_t numVertices = mdef->GetNumVertices();
 
 	m_UVs.resize(mdef->GetNumUVsPerVertex());
 	for (size_t i = 0; i < mdef->GetNumUVsPerVertex(); ++i)
 	{
 		m_UVs[i].format = Renderer::Backend::Format::R32G32_SFLOAT;
 		m_Array.AddAttribute(&m_UVs[i]);
 	}
 
 	m_Array.SetNumberOfVertices(numVertices);
 	m_Array.Layout();
 
 	for (size_t i = 0; i < mdef->GetNumUVsPerVertex(); ++i)
 	{
 		VertexArrayIterator<float[2]> UVit = m_UVs[i].GetIterator<float[2]>();
 		ModelRenderer::BuildUV(mdef, UVit, i);
 	}
 
 	m_Array.Upload();
 	m_Array.FreeBackingStore();
 
 	m_IndexArray.SetNumberOfVertices(mdef->GetNumFaces()*3);
 	m_IndexArray.Layout();
 	ModelRenderer::BuildIndices(mdef, m_IndexArray.GetIterator());
 	m_IndexArray.Upload();
 	m_IndexArray.FreeBackingStore();
 }
 
 
 struct ShaderModel : public CModelRData
 {
 	/// Dynamic per-CModel vertex array
 	VertexArray m_Array;
 
 	/// Position and normals/lighting are recalculated on CPU every frame
 	VertexArray::Attribute m_Position;
 	VertexArray::Attribute m_Normal;
 
 	ShaderModel(const void* key)
 		: CModelRData(key),
 		m_Array(Renderer::Backend::IBuffer::Type::VERTEX, true)
 	{}
 };
 
 
 struct ShaderModelVertexRenderer::ShaderModelRendererInternals
 {
 	/// Previously prepared modeldef
 	ShaderModelDef* shadermodeldef;
 };
 
 
 // Construction and Destruction
 ShaderModelVertexRenderer::ShaderModelVertexRenderer()
 {
 	m = new ShaderModelRendererInternals;
 	m->shadermodeldef = nullptr;
 }
 
 ShaderModelVertexRenderer::~ShaderModelVertexRenderer()
 {
 	delete m;
 }
 
 
 // Build model data (and modeldef data if necessary)
 CModelRData* ShaderModelVertexRenderer::CreateModelData(const void* key, CModel* model)
 {
 	CModelDefPtr mdef = model->GetModelDef();
 	ShaderModelDef* shadermodeldef = (ShaderModelDef*)mdef->GetRenderData(m);
 
 	if (!shadermodeldef)
 	{
 		shadermodeldef = new ShaderModelDef(mdef);
 		mdef->SetRenderData(m, shadermodeldef);
 	}
 
 	// Build the per-model data
 	ShaderModel* shadermodel = new ShaderModel(key);
 
 	// Positions and normals must be 16-byte aligned for SSE writes.
 
 	shadermodel->m_Position.format = Renderer::Backend::Format::R32G32B32A32_SFLOAT;
 	shadermodel->m_Array.AddAttribute(&shadermodel->m_Position);
 
 	shadermodel->m_Normal.format = Renderer::Backend::Format::R32G32B32A32_SFLOAT;
 	shadermodel->m_Array.AddAttribute(&shadermodel->m_Normal);
 
 	shadermodel->m_Array.SetNumberOfVertices(mdef->GetNumVertices());
 	shadermodel->m_Array.Layout();
 
 	// Verify alignment
 	ENSURE(shadermodel->m_Position.offset % 16 == 0);
 	ENSURE(shadermodel->m_Normal.offset % 16 == 0);
 	ENSURE(shadermodel->m_Array.GetStride() % 16 == 0);
 
 	return shadermodel;
 }
 
 
 // Fill in and upload dynamic vertex array
 void ShaderModelVertexRenderer::UpdateModelData(CModel* model, CModelRData* data, int updateflags)
 {
 	ShaderModel* shadermodel = static_cast<ShaderModel*>(data);
 
 	if (updateflags & RENDERDATA_UPDATE_VERTICES)
 	{
 		// build vertices
 		VertexArrayIterator<CVector3D> Position = shadermodel->m_Position.GetIterator<CVector3D>();
 		VertexArrayIterator<CVector3D> Normal = shadermodel->m_Normal.GetIterator<CVector3D>();
 
 		ModelRenderer::BuildPositionAndNormals(model, Position, Normal);
 
 		// upload everything to vertex buffer
 		shadermodel->m_Array.Upload();
 	}
 
 	shadermodel->m_Array.PrepareForRendering();
 }
 
-
-// Setup one rendering pass
-void ShaderModelVertexRenderer::BeginPass()
+void ShaderModelVertexRenderer::UploadModelData(
+	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
+	CModel* model, CModelRData* data)
 {
-}
+	ShaderModelDef* shaderModelDef = static_cast<ShaderModelDef*>(model->GetModelDef()->GetRenderData(m));
+	ENSURE(shaderModelDef);
 
-// Cleanup one rendering pass
-void ShaderModelVertexRenderer::EndPass(
-	Renderer::Backend::IDeviceCommandContext* UNUSED(deviceCommandContext))
-{
+	shaderModelDef->m_Array.UploadIfNeeded(deviceCommandContext);
+	shaderModelDef->m_IndexArray.UploadIfNeeded(deviceCommandContext);
+
+	ShaderModel* shaderModel = static_cast<ShaderModel*>(data);
+
+	shaderModel->m_Array.UploadIfNeeded(deviceCommandContext);
 }
 
 // Prepare UV coordinates for this modeldef
 void ShaderModelVertexRenderer::PrepareModelDef(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const CModelDef& def)
 {
 	m->shadermodeldef = (ShaderModelDef*)def.GetRenderData(m);
 
 	ENSURE(m->shadermodeldef);
 
-	m->shadermodeldef->m_Array.UploadIfNeeded(deviceCommandContext);
-
 	const uint32_t stride = m->shadermodeldef->m_Array.GetStride();
 	const uint32_t firstVertexOffset = m->shadermodeldef->m_Array.GetOffset() * stride;
 
 	deviceCommandContext->SetVertexAttributeFormat(
 		Renderer::Backend::VertexAttributeStream::UV0,
 		m->shadermodeldef->m_UVs[0].format,
 		m->shadermodeldef->m_UVs[0].offset, stride,
 		Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 	if (def.GetNumUVsPerVertex() >= 2)
 	{
 		deviceCommandContext->SetVertexAttributeFormat(
 			Renderer::Backend::VertexAttributeStream::UV1,
 			m->shadermodeldef->m_UVs[1].format,
 			m->shadermodeldef->m_UVs[1].offset, stride,
 			Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 	}
 
 	deviceCommandContext->SetVertexBuffer(
 		0, m->shadermodeldef->m_Array.GetBuffer(), firstVertexOffset);
 }
 
 // Render one model
 void ShaderModelVertexRenderer::RenderModel(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	Renderer::Backend::IShaderProgram* UNUSED(shader), CModel* model, CModelRData* data)
 {
 	const CModelDefPtr& mdldef = model->GetModelDef();
 	ShaderModel* shadermodel = static_cast<ShaderModel*>(data);
 
-	shadermodel->m_Array.UploadIfNeeded(deviceCommandContext);
-	m->shadermodeldef->m_IndexArray.UploadIfNeeded(deviceCommandContext);
-
 	const uint32_t stride = shadermodel->m_Array.GetStride();
 	const uint32_t firstVertexOffset = shadermodel->m_Array.GetOffset() * stride;
 
 	deviceCommandContext->SetVertexAttributeFormat(
 		Renderer::Backend::VertexAttributeStream::POSITION,
 		Renderer::Backend::Format::R32G32B32_SFLOAT,
 		shadermodel->m_Position.offset, stride,
 		Renderer::Backend::VertexAttributeRate::PER_VERTEX, 1);
 	deviceCommandContext->SetVertexAttributeFormat(
 		Renderer::Backend::VertexAttributeStream::NORMAL,
 		Renderer::Backend::Format::R32G32B32_SFLOAT,
 		shadermodel->m_Normal.offset, stride,
 		Renderer::Backend::VertexAttributeRate::PER_VERTEX, 1);
 
 	deviceCommandContext->SetVertexBuffer(
 		1, shadermodel->m_Array.GetBuffer(), firstVertexOffset);
 	deviceCommandContext->SetIndexBuffer(m->shadermodeldef->m_IndexArray.GetBuffer());
 
 	// Render the lot.
 	const size_t numberOfFaces = mdldef->GetNumFaces();
 
 	deviceCommandContext->DrawIndexedInRange(
 		m->shadermodeldef->m_IndexArray.GetOffset(), numberOfFaces * 3, 0, mdldef->GetNumVertices() - 1);
 
 	// Bump stats.
 	g_Renderer.m_Stats.m_DrawCalls++;
 	g_Renderer.m_Stats.m_ModelTris += numberOfFaces;
 }
 
Index: ps/trunk/source/renderer/HWLightingModelRenderer.h
===================================================================
--- ps/trunk/source/renderer/HWLightingModelRenderer.h	(revision 27181)
+++ ps/trunk/source/renderer/HWLightingModelRenderer.h	(revision 27182)
@@ -1,58 +1,58 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 /*
  * ModelVertexRenderer that transforms models on the CPU but performs
  * lighting in a vertex shader.
  */
 
 #ifndef INCLUDED_HWLIGHTINGMODELRENDERER
 #define INCLUDED_HWLIGHTINGMODELRENDERER
 
 #include "renderer/ModelVertexRenderer.h"
 
 /**
  * Render animated models using a ShaderRenderModifier.
  * This computes and binds per-vertex data; the modifier is responsible
  * for setting any shader uniforms etc.
  */
 class ShaderModelVertexRenderer : public ModelVertexRenderer
 {
 public:
 	ShaderModelVertexRenderer();
 	~ShaderModelVertexRenderer();
 
 	CModelRData* CreateModelData(const void* key, CModel* model) override;
 	void UpdateModelData(CModel* model, CModelRData* data, int updateflags) override;
 
-	void BeginPass() override;
-	void EndPass(
-		Renderer::Backend::IDeviceCommandContext* deviceCommandContext) override;
+	void UploadModelData(
+		Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
+		CModel* model, CModelRData* data) override;
 	void PrepareModelDef(
 		Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 		const CModelDef& def) override;
 	void RenderModel(
 		Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 		Renderer::Backend::IShaderProgram* shader, CModel* model, CModelRData* data) override;
 
 protected:
 	struct ShaderModelRendererInternals;
 	ShaderModelRendererInternals* m;
 };
 
 
 #endif // INCLUDED_HWLIGHTINGMODELRENDERER
Index: ps/trunk/source/renderer/InstancingModelRenderer.cpp
===================================================================
--- ps/trunk/source/renderer/InstancingModelRenderer.cpp	(revision 27181)
+++ ps/trunk/source/renderer/InstancingModelRenderer.cpp	(revision 27182)
@@ -1,402 +1,394 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 #include "renderer/InstancingModelRenderer.h"
 
 #include "graphics/Color.h"
 #include "graphics/LightEnv.h"
 #include "graphics/Model.h"
 #include "graphics/ModelDef.h"
 #include "maths/Vector3D.h"
 #include "maths/Vector4D.h"
 #include "ps/CLogger.h"
 #include "ps/CStrInternStatic.h"
 #include "renderer/Renderer.h"
 #include "renderer/RenderModifiers.h"
 #include "renderer/VertexArray.h"
 #include "third_party/mikktspace/weldmesh.h"
 
 
 struct IModelDef : public CModelDefRPrivate
 {
 	/// Static per-CModel vertex array
 	VertexArray m_Array;
 
 	/// Position and normals are static
 	VertexArray::Attribute m_Position;
 	VertexArray::Attribute m_Normal;
 	VertexArray::Attribute m_Tangent;
 	VertexArray::Attribute m_BlendJoints; // valid iff gpuSkinning == true
 	VertexArray::Attribute m_BlendWeights; // valid iff gpuSkinning == true
 
 	/// The number of UVs is determined by the model
 	std::vector<VertexArray::Attribute> m_UVs;
 
 	/// Indices are the same for all models, so share them
 	VertexIndexArray m_IndexArray;
 
 	IModelDef(const CModelDefPtr& mdef, bool gpuSkinning, bool calculateTangents);
 };
 
 
 IModelDef::IModelDef(const CModelDefPtr& mdef, bool gpuSkinning, bool calculateTangents)
 	: m_IndexArray(false), m_Array(Renderer::Backend::IBuffer::Type::VERTEX, false)
 {
 	size_t numVertices = mdef->GetNumVertices();
 
 	m_Position.format = Renderer::Backend::Format::R32G32B32_SFLOAT;
 	m_Array.AddAttribute(&m_Position);
 
 	m_Normal.format = Renderer::Backend::Format::R32G32B32_SFLOAT;
 	m_Array.AddAttribute(&m_Normal);
 
 	m_UVs.resize(mdef->GetNumUVsPerVertex());
 	for (size_t i = 0; i < mdef->GetNumUVsPerVertex(); i++)
 	{
 		m_UVs[i].format = Renderer::Backend::Format::R32G32_SFLOAT;
 		m_Array.AddAttribute(&m_UVs[i]);
 	}
 
 	if (gpuSkinning)
 	{
 		// We can't use a lot of bones because it costs uniform memory. Recommended
 		// number of bones per model is 32.
 		// Add 1 to NumBones because of the special 'root' bone.
 		if (mdef->GetNumBones() + 1 > 64)
 			LOGERROR("Model '%s' has too many bones %zu/64", mdef->GetName().string8().c_str(), mdef->GetNumBones() + 1);
 		ENSURE(mdef->GetNumBones() + 1 <= 64);
 
 		m_BlendJoints.format = Renderer::Backend::Format::R8G8B8A8_UINT;
 		m_Array.AddAttribute(&m_BlendJoints);
 
 		m_BlendWeights.format = Renderer::Backend::Format::R8G8B8A8_UNORM;
 		m_Array.AddAttribute(&m_BlendWeights);
 	}
 
 	if (calculateTangents)
 	{
 		// Generate tangents for the geometry:-
 
 		m_Tangent.format = Renderer::Backend::Format::R32G32B32A32_SFLOAT;
 		m_Array.AddAttribute(&m_Tangent);
 
 		// floats per vertex; position + normal + tangent + UV*sets [+ GPUskinning]
 		int numVertexAttrs = 3 + 3 + 4 + 2 * mdef->GetNumUVsPerVertex();
 		if (gpuSkinning)
 		{
 			numVertexAttrs += 8;
 		}
 
 		// the tangent generation can increase the number of vertices temporarily
 		// so reserve a bit more memory to avoid reallocations in GenTangents (in most cases)
 		std::vector<float> newVertices;
 		newVertices.reserve(numVertexAttrs * numVertices * 2);
 
 		// Generate the tangents
 		ModelRenderer::GenTangents(mdef, newVertices, gpuSkinning);
 
 		// how many vertices do we have after generating tangents?
 		int newNumVert = newVertices.size() / numVertexAttrs;
 
 		std::vector<int> remapTable(newNumVert);
 		std::vector<float> vertexDataOut(newNumVert * numVertexAttrs);
 
 		// re-weld the mesh to remove duplicated vertices
 		int numVertices2 = WeldMesh(&remapTable[0], &vertexDataOut[0],
 					&newVertices[0], newNumVert, numVertexAttrs);
 
 		// Copy the model data to graphics memory:-
 
 		m_Array.SetNumberOfVertices(numVertices2);
 		m_Array.Layout();
 
 		VertexArrayIterator<CVector3D> Position = m_Position.GetIterator<CVector3D>();
 		VertexArrayIterator<CVector3D> Normal = m_Normal.GetIterator<CVector3D>();
 		VertexArrayIterator<CVector4D> Tangent = m_Tangent.GetIterator<CVector4D>();
 
 		VertexArrayIterator<u8[4]> BlendJoints;
 		VertexArrayIterator<u8[4]> BlendWeights;
 		if (gpuSkinning)
 		{
 			BlendJoints = m_BlendJoints.GetIterator<u8[4]>();
 			BlendWeights = m_BlendWeights.GetIterator<u8[4]>();
 		}
 
 		// copy everything into the vertex array
 		for (int i = 0; i < numVertices2; i++)
 		{
 			int q = numVertexAttrs * i;
 
 			Position[i] = CVector3D(vertexDataOut[q + 0], vertexDataOut[q + 1], vertexDataOut[q + 2]);
 			q += 3;
 
 			Normal[i] = CVector3D(vertexDataOut[q + 0], vertexDataOut[q + 1], vertexDataOut[q + 2]);
 			q += 3;
 
 			Tangent[i] = CVector4D(vertexDataOut[q + 0], vertexDataOut[q + 1], vertexDataOut[q + 2],
 					vertexDataOut[q + 3]);
 			q += 4;
 
 			if (gpuSkinning)
 			{
 				for (size_t j = 0; j < 4; ++j)
 				{
 					BlendJoints[i][j] = (u8)vertexDataOut[q + 0 + 2 * j];
 					BlendWeights[i][j] = (u8)vertexDataOut[q + 1 + 2 * j];
 				}
 				q += 8;
 			}
 
 			for (size_t j = 0; j < mdef->GetNumUVsPerVertex(); j++)
 			{
 				VertexArrayIterator<float[2]> UVit = m_UVs[j].GetIterator<float[2]>();
 				UVit[i][0] = vertexDataOut[q + 0 + 2 * j];
 				UVit[i][1] = vertexDataOut[q + 1 + 2 * j];
 			}
 		}
 
 		// upload vertex data
 		m_Array.Upload();
 		m_Array.FreeBackingStore();
 
 		m_IndexArray.SetNumberOfVertices(mdef->GetNumFaces() * 3);
 		m_IndexArray.Layout();
 
 		VertexArrayIterator<u16> Indices = m_IndexArray.GetIterator();
 
 		size_t idxidx = 0;
 
 		// reindex geometry and upload index
 		for (size_t j = 0; j < mdef->GetNumFaces(); ++j)
 		{
 			Indices[idxidx++] = remapTable[j * 3 + 0];
 			Indices[idxidx++] = remapTable[j * 3 + 1];
 			Indices[idxidx++] = remapTable[j * 3 + 2];
 		}
 
 		m_IndexArray.Upload();
 		m_IndexArray.FreeBackingStore();
 	}
 	else
 	{
 		// Upload model without calculating tangents:-
 
 		m_Array.SetNumberOfVertices(numVertices);
 		m_Array.Layout();
 
 		VertexArrayIterator<CVector3D> Position = m_Position.GetIterator<CVector3D>();
 		VertexArrayIterator<CVector3D> Normal = m_Normal.GetIterator<CVector3D>();
 
 		ModelRenderer::CopyPositionAndNormals(mdef, Position, Normal);
 
 		for (size_t i = 0; i < mdef->GetNumUVsPerVertex(); i++)
 		{
 			VertexArrayIterator<float[2]> UVit = m_UVs[i].GetIterator<float[2]>();
 			ModelRenderer::BuildUV(mdef, UVit, i);
 		}
 
 		if (gpuSkinning)
 		{
 			VertexArrayIterator<u8[4]> BlendJoints = m_BlendJoints.GetIterator<u8[4]>();
 			VertexArrayIterator<u8[4]> BlendWeights = m_BlendWeights.GetIterator<u8[4]>();
 			for (size_t i = 0; i < numVertices; ++i)
 			{
 				const SModelVertex& vtx = mdef->GetVertices()[i];
 				for (size_t j = 0; j < 4; ++j)
 				{
 					BlendJoints[i][j] = vtx.m_Blend.m_Bone[j];
 					BlendWeights[i][j] = (u8)(255.f * vtx.m_Blend.m_Weight[j]);
 				}
 			}
 		}
 
 		m_Array.Upload();
 		m_Array.FreeBackingStore();
 
 		m_IndexArray.SetNumberOfVertices(mdef->GetNumFaces()*3);
 		m_IndexArray.Layout();
 		ModelRenderer::BuildIndices(mdef, m_IndexArray.GetIterator());
 		m_IndexArray.Upload();
 		m_IndexArray.FreeBackingStore();
 	}
 }
 
 
 struct InstancingModelRendererInternals
 {
 	bool gpuSkinning;
 
 	bool calculateTangents;
 
 	/// Previously prepared modeldef
 	IModelDef* imodeldef;
 
 	/// Index base for imodeldef
 	u8* imodeldefIndexBase;
 };
 
 
 // Construction and Destruction
 InstancingModelRenderer::InstancingModelRenderer(bool gpuSkinning, bool calculateTangents)
 {
 	m = new InstancingModelRendererInternals;
 	m->gpuSkinning = gpuSkinning;
 	m->calculateTangents = calculateTangents;
 	m->imodeldef = 0;
 }
 
 InstancingModelRenderer::~InstancingModelRenderer()
 {
 	delete m;
 }
 
 
 // Build modeldef data if necessary - we have no per-CModel data
 CModelRData* InstancingModelRenderer::CreateModelData(const void* key, CModel* model)
 {
 	CModelDefPtr mdef = model->GetModelDef();
 	IModelDef* imodeldef = (IModelDef*)mdef->GetRenderData(m);
 
 	if (m->gpuSkinning)
  		ENSURE(model->IsSkinned());
 	else
 		ENSURE(!model->IsSkinned());
 
 	if (!imodeldef)
 	{
 		imodeldef = new IModelDef(mdef, m->gpuSkinning, m->calculateTangents);
 		mdef->SetRenderData(m, imodeldef);
 	}
 
 	return new CModelRData(key);
 }
 
 
 void InstancingModelRenderer::UpdateModelData(CModel* UNUSED(model), CModelRData* UNUSED(data), int UNUSED(updateflags))
 {
 	// We have no per-CModel data
 }
 
-
-// Setup one rendering pass.
-void InstancingModelRenderer::BeginPass()
-{
-}
-
-// Cleanup rendering pass.
-void InstancingModelRenderer::EndPass(
-	Renderer::Backend::IDeviceCommandContext* UNUSED(deviceCommandContext))
+void InstancingModelRenderer::UploadModelData(
+	Renderer::Backend::IDeviceCommandContext* UNUSED(deviceCommandContext),
+	CModel* UNUSED(model), CModelRData* UNUSED(data))
 {
+	// Data uploaded once during creation as we don't update it dynamically.
 }
 
 // Prepare UV coordinates for this modeldef
 void InstancingModelRenderer::PrepareModelDef(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const CModelDef& def)
 {
 	m->imodeldef = (IModelDef*)def.GetRenderData(m);
-
 	ENSURE(m->imodeldef);
-	m->imodeldef->m_Array.UploadIfNeeded(deviceCommandContext);
-	m->imodeldef->m_IndexArray.UploadIfNeeded(deviceCommandContext);
 
 	deviceCommandContext->SetIndexBuffer(m->imodeldef->m_IndexArray.GetBuffer());
 
 	const uint32_t stride = m->imodeldef->m_Array.GetStride();
 	const uint32_t firstVertexOffset = m->imodeldef->m_Array.GetOffset() * stride;
 
 	deviceCommandContext->SetVertexAttributeFormat(
 		Renderer::Backend::VertexAttributeStream::POSITION,
 		m->imodeldef->m_Position.format,
 		m->imodeldef->m_Position.offset, stride,
 		Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 	deviceCommandContext->SetVertexAttributeFormat(
 		Renderer::Backend::VertexAttributeStream::NORMAL,
 		m->imodeldef->m_Normal.format,
 		m->imodeldef->m_Normal.offset, stride,
 		Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 
 	constexpr size_t MAX_UV = 2;
 	for (size_t uv = 0; uv < std::min(MAX_UV, def.GetNumUVsPerVertex()); ++uv)
 	{
 		const Renderer::Backend::VertexAttributeStream stream =
 			static_cast<Renderer::Backend::VertexAttributeStream>(
 				static_cast<int>(Renderer::Backend::VertexAttributeStream::UV0) + uv);
 		deviceCommandContext->SetVertexAttributeFormat(
 			stream, m->imodeldef->m_UVs[uv].format,
 			m->imodeldef->m_UVs[uv].offset, stride,
 			Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 	}
 
 	// GPU skinning requires extra attributes to compute positions/normals.
 	if (m->gpuSkinning)
 	{
 		deviceCommandContext->SetVertexAttributeFormat(
 			Renderer::Backend::VertexAttributeStream::UV2,
 			m->imodeldef->m_BlendJoints.format,
 			m->imodeldef->m_BlendJoints.offset, stride,
 			Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 		deviceCommandContext->SetVertexAttributeFormat(
 			Renderer::Backend::VertexAttributeStream::UV3,
 			m->imodeldef->m_BlendWeights.format,
 			m->imodeldef->m_BlendWeights.offset, stride,
 			Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 	}
 
 	if (m->calculateTangents)
 	{
 		deviceCommandContext->SetVertexAttributeFormat(
 			Renderer::Backend::VertexAttributeStream::UV4,
 			m->imodeldef->m_Tangent.format,
 			m->imodeldef->m_Tangent.offset, stride,
 			Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 	}
 
 	deviceCommandContext->SetVertexBuffer(
 		0, m->imodeldef->m_Array.GetBuffer(), firstVertexOffset);
 }
 
 
 // Render one model
 void InstancingModelRenderer::RenderModel(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	Renderer::Backend::IShaderProgram* shader, CModel* model, CModelRData* UNUSED(data))
 {
 	const CModelDefPtr& mdldef = model->GetModelDef();
 
 	if (m->gpuSkinning)
 	{
 		// Bind matrices for current animation state.
 		// Add 1 to NumBones because of the special 'root' bone.
 		deviceCommandContext->SetUniform(
 			shader->GetBindingSlot(str_skinBlendMatrices),
 			PS::span<const float>(
 				model->GetAnimatedBoneMatrices()[0]._data,
 				model->GetAnimatedBoneMatrices()[0].AsFloatArray().size() * (mdldef->GetNumBones() + 1)));
 	}
 
 	// Render the lot.
 	const size_t numberOfFaces = mdldef->GetNumFaces();
 
 	deviceCommandContext->DrawIndexedInRange(
 		m->imodeldef->m_IndexArray.GetOffset(), numberOfFaces * 3, 0, m->imodeldef->m_Array.GetNumberOfVertices() - 1);
 
 	// Bump stats.
 	g_Renderer.m_Stats.m_DrawCalls++;
 	g_Renderer.m_Stats.m_ModelTris += numberOfFaces;
 }
Index: ps/trunk/source/renderer/InstancingModelRenderer.h
===================================================================
--- ps/trunk/source/renderer/InstancingModelRenderer.h	(revision 27181)
+++ ps/trunk/source/renderer/InstancingModelRenderer.h	(revision 27182)
@@ -1,58 +1,58 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 /*
  * Special ModelVertexRender that only works for non-animated models,
  * but is very fast for instanced models.
  */
 
 #ifndef INCLUDED_INSTANCINGMODELRENDERER
 #define INCLUDED_INSTANCINGMODELRENDERER
 
 #include "renderer/ModelVertexRenderer.h"
 
 struct InstancingModelRendererInternals;
 
 /**
  * Render non-animated (but potentially moving) models using a ShaderRenderModifier.
  * This computes and binds per-vertex data; the modifier is responsible
  * for setting any shader uniforms etc (including the instancing transform).
  */
 class InstancingModelRenderer : public ModelVertexRenderer
 {
 public:
 	InstancingModelRenderer(bool gpuSkinning, bool calculateTangents);
 	~InstancingModelRenderer();
 
 	// Implementations
 	CModelRData* CreateModelData(const void* key, CModel* model) override;
 	void UpdateModelData(CModel* model, CModelRData* data, int updateflags) override;
 
-	void BeginPass() override;
-	void EndPass(
-		Renderer::Backend::IDeviceCommandContext* deviceCommandContext) override;
+	void UploadModelData(
+		Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
+		CModel* model, CModelRData* data) override;
 	void PrepareModelDef(
 		Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 		const CModelDef& def) override;
 	void RenderModel(Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 		Renderer::Backend::IShaderProgram* shader, CModel* model, CModelRData* data) override;
 
 protected:
 	InstancingModelRendererInternals* m;
 };
 
 #endif // INCLUDED_INSTANCINGMODELRENDERER
Index: ps/trunk/source/renderer/ModelRenderer.cpp
===================================================================
--- ps/trunk/source/renderer/ModelRenderer.cpp	(revision 27181)
+++ ps/trunk/source/renderer/ModelRenderer.cpp	(revision 27182)
@@ -1,764 +1,772 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "graphics/Color.h"
 #include "graphics/LightEnv.h"
 #include "graphics/Material.h"
 #include "graphics/Model.h"
 #include "graphics/ModelDef.h"
 #include "graphics/ShaderManager.h"
 #include "graphics/TextureManager.h"
 #include "lib/allocators/DynamicArena.h"
 #include "lib/allocators/STLAllocators.h"
 #include "lib/hash.h"
 #include "maths/Vector3D.h"
 #include "maths/Vector4D.h"
 #include "ps/CLogger.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Profile.h"
 #include "renderer/MikktspaceWrap.h"
 #include "renderer/ModelRenderer.h"
 #include "renderer/ModelVertexRenderer.h"
 #include "renderer/Renderer.h"
 #include "renderer/RenderModifiers.h"
 #include "renderer/SceneRenderer.h"
 #include "renderer/SkyManager.h"
 #include "renderer/TimeManager.h"
 #include "renderer/WaterManager.h"
 
 ///////////////////////////////////////////////////////////////////////////////////////////////
 // ModelRenderer implementation
 
 void ModelRenderer::Init()
 {
 }
 
 // Helper function to copy object-space position and normal vectors into arrays.
 void ModelRenderer::CopyPositionAndNormals(
 		const CModelDefPtr& mdef,
 		const VertexArrayIterator<CVector3D>& Position,
 		const VertexArrayIterator<CVector3D>& Normal)
 {
 	size_t numVertices = mdef->GetNumVertices();
 	SModelVertex* vertices = mdef->GetVertices();
 
 	for (size_t j = 0; j < numVertices; ++j)
 	{
 		Position[j] = vertices[j].m_Coords;
 		Normal[j] = vertices[j].m_Norm;
 	}
 }
 
 // Helper function to transform position and normal vectors into world-space.
 void ModelRenderer::BuildPositionAndNormals(
 		CModel* model,
 		const VertexArrayIterator<CVector3D>& Position,
 		const VertexArrayIterator<CVector3D>& Normal)
 {
 	CModelDefPtr mdef = model->GetModelDef();
 	size_t numVertices = mdef->GetNumVertices();
 	SModelVertex* vertices = mdef->GetVertices();
 
 	if (model->IsSkinned())
 	{
 		// boned model - calculate skinned vertex positions/normals
 
 		// Avoid the noisy warnings that occur inside SkinPoint/SkinNormal in
 		// some broken situations
 		if (numVertices && vertices[0].m_Blend.m_Bone[0] == 0xff)
 		{
 			LOGERROR("Model %s is boned with unboned animation", mdef->GetName().string8());
 			return;
 		}
 
 		CModelDef::SkinPointsAndNormals(numVertices, Position, Normal, vertices, mdef->GetBlendIndices(), model->GetAnimatedBoneMatrices());
 	}
 	else
 	{
 		PROFILE("software transform");
 		// just copy regular positions, transform normals to world space
 		const CMatrix3D& transform = model->GetTransform();
 		const CMatrix3D& invtransform = model->GetInvTransform();
 		for (size_t j = 0; j < numVertices; ++j)
 		{
 			transform.Transform(vertices[j].m_Coords, Position[j]);
 			invtransform.RotateTransposed(vertices[j].m_Norm, Normal[j]);
 		}
 	}
 }
 
 
 // Helper function for lighting
 void ModelRenderer::BuildColor4ub(
 		CModel* model,
 		const VertexArrayIterator<CVector3D>& Normal,
 		const VertexArrayIterator<SColor4ub>& Color)
 {
 	PROFILE("lighting vertices");
 
 	CModelDefPtr mdef = model->GetModelDef();
 	size_t numVertices = mdef->GetNumVertices();
 	const CLightEnv& lightEnv = g_Renderer.GetSceneRenderer().GetLightEnv();
 	CColor shadingColor = model->GetShadingColor();
 
 	for (size_t j = 0; j < numVertices; ++j)
 	{
 		RGBColor tempcolor = lightEnv.EvaluateUnitScaled(Normal[j]);
 		tempcolor.X *= shadingColor.r;
 		tempcolor.Y *= shadingColor.g;
 		tempcolor.Z *= shadingColor.b;
 		Color[j] = ConvertRGBColorTo4ub(tempcolor);
 	}
 }
 
 
 void ModelRenderer::GenTangents(const CModelDefPtr& mdef, std::vector<float>& newVertices, bool gpuSkinning)
 {
 	MikkTSpace ms(mdef, newVertices, gpuSkinning);
 	ms.Generate();
 }
 
 
 // Copy UV coordinates
 void ModelRenderer::BuildUV(
 		const CModelDefPtr& mdef,
 		const VertexArrayIterator<float[2]>& UV,
 		int UVset)
 {
 	const size_t numVertices = mdef->GetNumVertices();
 	const size_t numberOfUVPerVertex = mdef->GetNumUVsPerVertex();
 
 	for (size_t j = 0; j < numVertices; ++j)
 	{
 		const CVector2D& uv = mdef->GetUVCoordinates()[j * numberOfUVPerVertex + UVset];
 		UV[j][0] = uv.X;
 		UV[j][1] = 1.0 - uv.Y;
 	}
 }
 
 
 // Build default indices array.
 void ModelRenderer::BuildIndices(
 		const CModelDefPtr& mdef,
 		const VertexArrayIterator<u16>& Indices)
 {
 	size_t idxidx = 0;
 	SModelFace* faces = mdef->GetFaces();
 
 	for (size_t j = 0; j < mdef->GetNumFaces(); ++j)
 	{
 		SModelFace& face = faces[j];
 		Indices[idxidx++] = face.m_Verts[0];
 		Indices[idxidx++] = face.m_Verts[1];
 		Indices[idxidx++] = face.m_Verts[2];
 	}
 }
 
 
 
 ///////////////////////////////////////////////////////////////////////////////////////////////
 // ShaderModelRenderer implementation
 
 
 /**
  * Internal data of the ShaderModelRenderer.
  *
  * Separated into the source file to increase implementation hiding (and to
  * avoid some causes of recompiles).
  */
 struct ShaderModelRenderer::ShaderModelRendererInternals
 {
 	ShaderModelRendererInternals(ShaderModelRenderer* r) : m_Renderer(r) { }
 
 	/// Back-link to "our" renderer
 	ShaderModelRenderer* m_Renderer;
 
 	/// ModelVertexRenderer used for vertex transformations
 	ModelVertexRendererPtr vertexRenderer;
 
 	/// List of submitted models for rendering in this frame
 	std::vector<CModel*> submissions[CSceneRenderer::CULL_MAX];
 };
 
 
 // Construction/Destruction
 ShaderModelRenderer::ShaderModelRenderer(ModelVertexRendererPtr vertexrenderer)
 {
 	m = new ShaderModelRendererInternals(this);
 	m->vertexRenderer = vertexrenderer;
 }
 
 ShaderModelRenderer::~ShaderModelRenderer()
 {
 	delete m;
 }
 
 // Submit one model.
 void ShaderModelRenderer::Submit(int cullGroup, CModel* model)
 {
 	CModelRData* rdata = (CModelRData*)model->GetRenderData();
 
 	// Ensure model data is valid
 	const void* key = m->vertexRenderer.get();
 	if (!rdata || rdata->GetKey() != key)
 	{
 		rdata = m->vertexRenderer->CreateModelData(key, model);
 		model->SetRenderData(rdata);
 		model->SetDirty(~0u);
 	}
 
 	m->submissions[cullGroup].push_back(model);
 }
 
 
 // Call update for all submitted models and enter the rendering phase
 void ShaderModelRenderer::PrepareModels()
 {
 	for (int cullGroup = 0; cullGroup < CSceneRenderer::CULL_MAX; ++cullGroup)
 	{
-		for (size_t i = 0; i < m->submissions[cullGroup].size(); ++i)
+		for (CModel* model : m->submissions[cullGroup])
 		{
-			CModel* model = m->submissions[cullGroup][i];
-
 			model->ValidatePosition();
 
 			CModelRData* rdata = static_cast<CModelRData*>(model->GetRenderData());
 			ENSURE(rdata->GetKey() == m->vertexRenderer.get());
 
 			m->vertexRenderer->UpdateModelData(model, rdata, rdata->m_UpdateFlags);
 			rdata->m_UpdateFlags = 0;
 		}
 	}
 }
 
+void ShaderModelRenderer::UploadModels(
+	Renderer::Backend::IDeviceCommandContext* deviceCommandContext)
+{
+	for (int cullGroup = 0; cullGroup < CSceneRenderer::CULL_MAX; ++cullGroup)
+	{
+		for (CModel* model : m->submissions[cullGroup])
+		{
+			CModelRData* rdata = static_cast<CModelRData*>(model->GetRenderData());
+			ENSURE(rdata->GetKey() == m->vertexRenderer.get());
+
+			m->vertexRenderer->UploadModelData(deviceCommandContext, model, rdata);
+		}
+	}
+}
 
 // Clear the submissions list
 void ShaderModelRenderer::EndFrame()
 {
 	for (int cullGroup = 0; cullGroup < CSceneRenderer::CULL_MAX; ++cullGroup)
 		m->submissions[cullGroup].clear();
 }
 
 
 // Helper structs for ShaderModelRenderer::Render():
 
 struct SMRSortByDistItem
 {
 	size_t techIdx;
 	CModel* model;
 	float dist;
 };
 
 struct SMRBatchModel
 {
 	bool operator()(CModel* a, CModel* b)
 	{
 		if (a->GetModelDef() < b->GetModelDef())
 			return true;
 		if (b->GetModelDef() < a->GetModelDef())
 			return false;
 
 		if (a->GetMaterial().GetDiffuseTexture() < b->GetMaterial().GetDiffuseTexture())
 			return true;
 		if (b->GetMaterial().GetDiffuseTexture() < a->GetMaterial().GetDiffuseTexture())
 			return false;
 
 		return a->GetMaterial().GetStaticUniforms() < b->GetMaterial().GetStaticUniforms();
 	}
 };
 
 struct SMRCompareSortByDistItem
 {
 	bool operator()(const SMRSortByDistItem& a, const SMRSortByDistItem& b)
 	{
 		// Prefer items with greater distance, so we draw back-to-front
 		return (a.dist > b.dist);
 
 		// (Distances will almost always be distinct, so we don't need to bother
 		// tie-breaking on modeldef/texture/etc)
 	}
 };
 
 class SMRMaterialBucketKey
 {
 public:
 	SMRMaterialBucketKey(CStrIntern effect, const CShaderDefines& defines)
 		: effect(effect), defines(defines) { }
 
 	SMRMaterialBucketKey(const SMRMaterialBucketKey& entity) = default;
 
 	CStrIntern effect;
 	CShaderDefines defines;
 
 	bool operator==(const SMRMaterialBucketKey& b) const
 	{
 		return (effect == b.effect && defines == b.defines);
 	}
 
 private:
 	SMRMaterialBucketKey& operator=(const SMRMaterialBucketKey&);
 };
 
 struct SMRMaterialBucketKeyHash
 {
 	size_t operator()(const SMRMaterialBucketKey& key) const
 	{
 		size_t hash = 0;
 		hash_combine(hash, key.effect.GetHash());
 		hash_combine(hash, key.defines.GetHash());
 		return hash;
 	}
 };
 
 struct SMRTechBucket
 {
 	CShaderTechniquePtr tech;
 	CModel** models;
 	size_t numModels;
 
 	// Model list is stored as pointers, not as a std::vector,
 	// so that sorting lists of this struct is fast
 };
 
 struct SMRCompareTechBucket
 {
 	bool operator()(const SMRTechBucket& a, const SMRTechBucket& b)
 	{
 		return a.tech < b.tech;
 	}
 };
 
 void ShaderModelRenderer::Render(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const RenderModifierPtr& modifier, const CShaderDefines& context, int cullGroup, int flags)
 {
 	if (m->submissions[cullGroup].empty())
 		return;
 
 	CMatrix3D worldToCam;
 	g_Renderer.GetSceneRenderer().GetViewCamera().GetOrientation().GetInverse(worldToCam);
 
 	/*
 	 * Rendering approach:
 	 *
 	 * m->submissions contains the list of CModels to render.
 	 *
 	 * The data we need to render a model is:
 	 *  - CShaderTechnique
 	 *  - CTexture
 	 *  - CShaderUniforms
 	 *  - CModelDef (mesh data)
 	 *  - CModel (model instance data)
 	 *
 	 * For efficient rendering, we need to batch the draw calls to minimise state changes.
 	 * (Uniform and texture changes are assumed to be cheaper than binding new mesh data,
 	 * and shader changes are assumed to be most expensive.)
 	 * First, group all models that share a technique to render them together.
 	 * Within those groups, sub-group by CModelDef.
 	 * Within those sub-groups, sub-sub-group by CTexture.
 	 * Within those sub-sub-groups, sub-sub-sub-group by CShaderUniforms.
 	 *
 	 * Alpha-blended models have to be sorted by distance from camera,
 	 * then we can batch as long as the order is preserved.
 	 * Non-alpha-blended models can be arbitrarily reordered to maximise batching.
 	 *
 	 * For each model, the CShaderTechnique is derived from:
 	 *  - The current global 'context' defines
 	 *  - The CModel's material's defines
 	 *  - The CModel's material's shader effect name
 	 *
 	 * There are a smallish number of materials, and a smaller number of techniques.
 	 *
 	 * To minimise technique lookups, we first group models by material,
 	 * in 'materialBuckets' (a hash table).
 	 *
 	 * For each material bucket we then look up the appropriate shader technique.
 	 * If the technique requires sort-by-distance, the model is added to the
 	 * 'sortByDistItems' list with its computed distance.
 	 * Otherwise, the bucket's list of models is sorted by modeldef+texture+uniforms,
 	 * then the technique and model list is added to 'techBuckets'.
 	 *
 	 * 'techBuckets' is then sorted by technique, to improve batching when multiple
 	 * materials map onto the same technique.
 	 *
 	 * (Note that this isn't perfect batching: we don't sort across models in
 	 * multiple buckets that share a technique. In practice that shouldn't reduce
 	 * batching much (we rarely have one mesh used with multiple materials),
 	 * and it saves on copying and lets us sort smaller lists.)
 	 *
 	 * Extra tech buckets are added for the sorted-by-distance models without reordering.
 	 * Finally we render by looping over each tech bucket, then looping over the model
 	 * list in each, rebinding the GL state whenever it changes.
 	 */
 
 	using Arena = Allocators::DynamicArena<256 * KiB>;
 
 	Arena arena;
 	using ModelListAllocator = ProxyAllocator<CModel*, Arena>;
 	using ModelList_t = std::vector<CModel*, ModelListAllocator>;
 	using MaterialBuckets_t = std::unordered_map<
 		SMRMaterialBucketKey,
 		ModelList_t,
 		SMRMaterialBucketKeyHash,
 		std::equal_to<SMRMaterialBucketKey>,
 		ProxyAllocator<
 			std::pair<const SMRMaterialBucketKey, ModelList_t>,
 			Arena> >;
 
 	MaterialBuckets_t materialBuckets((MaterialBuckets_t::allocator_type(arena)));
 
 	{
 		PROFILE3("bucketing by material");
 
 		for (size_t i = 0; i < m->submissions[cullGroup].size(); ++i)
 		{
 			CModel* model = m->submissions[cullGroup][i];
 			const CShaderDefines& defines = model->GetMaterial().GetShaderDefines();
 			SMRMaterialBucketKey key(model->GetMaterial().GetShaderEffect(), defines);
 
 			MaterialBuckets_t::iterator it = materialBuckets.find(key);
 			if (it == materialBuckets.end())
 			{
 				std::pair<MaterialBuckets_t::iterator, bool> inserted = materialBuckets.insert(
 					std::make_pair(key, ModelList_t(ModelList_t::allocator_type(arena))));
 				inserted.first->second.reserve(32);
 				inserted.first->second.push_back(model);
 			}
 			else
 			{
 				it->second.push_back(model);
 			}
 		}
 	}
 
 	using SortByDistItemsAllocator = ProxyAllocator<SMRSortByDistItem, Arena>;
 	std::vector<SMRSortByDistItem, SortByDistItemsAllocator> sortByDistItems((SortByDistItemsAllocator(arena)));
 
 	using SortByTechItemsAllocator = ProxyAllocator<CShaderTechniquePtr, Arena>;
 	std::vector<CShaderTechniquePtr, SortByTechItemsAllocator> sortByDistTechs((SortByTechItemsAllocator(arena)));
 		// indexed by sortByDistItems[i].techIdx
 		// (which stores indexes instead of CShaderTechniquePtr directly
 		// to avoid the shared_ptr copy cost when sorting; maybe it'd be better
 		// if we just stored raw CShaderTechnique* and assumed the shader manager
 		// will keep it alive long enough)
 
 	using TechBucketsAllocator =  ProxyAllocator<SMRTechBucket, Arena>;
 	std::vector<SMRTechBucket, TechBucketsAllocator> techBuckets((TechBucketsAllocator(arena)));
 
 	{
 		PROFILE3("processing material buckets");
 		for (MaterialBuckets_t::iterator it = materialBuckets.begin(); it != materialBuckets.end(); ++it)
 		{
 			CShaderDefines defines = context;
 			defines.SetMany(it->first.defines);
 			CShaderTechniquePtr tech = g_Renderer.GetShaderManager().LoadEffect(it->first.effect, defines);
 
 			// Skip invalid techniques (e.g. from data file errors)
 			if (!tech)
 				continue;
 
 			if (tech->GetSortByDistance())
 			{
 				// Add the tech into a vector so we can index it
 				// (There might be duplicates in this list, but that doesn't really matter)
 				if (sortByDistTechs.empty() || sortByDistTechs.back() != tech)
 					sortByDistTechs.push_back(tech);
 				size_t techIdx = sortByDistTechs.size() - 1;
 
 				// Add each model into sortByDistItems
 				for (size_t i = 0; i < it->second.size(); ++i)
 				{
 					SMRSortByDistItem itemWithDist;
 					itemWithDist.techIdx = techIdx;
 
 					CModel* model = it->second[i];
 					itemWithDist.model = model;
 
 					CVector3D modelpos = model->GetTransform().GetTranslation();
 					itemWithDist.dist = worldToCam.Transform(modelpos).Z;
 
 					sortByDistItems.push_back(itemWithDist);
 				}
 			}
 			else
 			{
 				// Sort model list by modeldef+texture, for batching
 				// TODO: This only sorts by base texture. While this is an OK approximation
 				// for most cases (as related samplers are usually used together), it would be better
 				// to take all the samplers into account when sorting here.
 				std::sort(it->second.begin(), it->second.end(), SMRBatchModel());
 
 				// Add a tech bucket pointing at this model list
 				SMRTechBucket techBucket = { tech, &it->second[0], it->second.size() };
 				techBuckets.push_back(techBucket);
 			}
 		}
 	}
 
 	{
 		PROFILE3("sorting tech buckets");
 		// Sort by technique, for better batching
 		std::sort(techBuckets.begin(), techBuckets.end(), SMRCompareTechBucket());
 	}
 
 	// List of models corresponding to sortByDistItems[i].model
 	// (This exists primarily because techBuckets wants a CModel**;
 	// we could avoid the cost of copying into this list by adding
 	// a stride length into techBuckets and not requiring contiguous CModel*s)
 	std::vector<CModel*, ModelListAllocator> sortByDistModels((ModelListAllocator(arena)));
 
 	if (!sortByDistItems.empty())
 	{
 		{
 			PROFILE3("sorting items by dist");
 			std::sort(sortByDistItems.begin(), sortByDistItems.end(), SMRCompareSortByDistItem());
 		}
 
 		{
 			PROFILE3("batching dist-sorted items");
 
 			sortByDistModels.reserve(sortByDistItems.size());
 
 			// Find runs of distance-sorted models that share a technique,
 			// and create a new tech bucket for each run
 
 			size_t start = 0; // start of current run
 			size_t currentTechIdx = sortByDistItems[start].techIdx;
 
 			for (size_t end = 0; end < sortByDistItems.size(); ++end)
 			{
 				sortByDistModels.push_back(sortByDistItems[end].model);
 
 				size_t techIdx = sortByDistItems[end].techIdx;
 				if (techIdx != currentTechIdx)
 				{
 					// Start of a new run - push the old run into a new tech bucket
 					SMRTechBucket techBucket = { sortByDistTechs[currentTechIdx], &sortByDistModels[start], end - start };
 					techBuckets.push_back(techBucket);
 					start = end;
 					currentTechIdx = techIdx;
 				}
 			}
 
 			// Add the tech bucket for the final run
 			SMRTechBucket techBucket = { sortByDistTechs[currentTechIdx], &sortByDistModels[start], sortByDistItems.size() - start };
 			techBuckets.push_back(techBucket);
 		}
 	}
 
 	const double time = g_Renderer.GetTimeManager().GetGlobalTime();
 
 	{
 		PROFILE3("rendering bucketed submissions");
 
 		size_t idxTechStart = 0;
 
 		// This vector keeps track of texture changes during rendering. It is kept outside the
 		// loops to avoid excessive reallocations. The token allocation of 64 elements
 		// should be plenty, though it is reallocated below (at a cost) if necessary.
 		using TextureListAllocator = ProxyAllocator<CTexture*, Arena>;
 		std::vector<CTexture*, TextureListAllocator> currentTexs((TextureListAllocator(arena)));
 		currentTexs.reserve(64);
 
 		// texBindings holds the identifier bindings in the shader, which can no longer be defined
 		// statically in the ShaderRenderModifier class. texBindingNames uses interned strings to
 		// keep track of when bindings need to be reevaluated.
 		using BindingListAllocator = ProxyAllocator<int32_t, Arena>;
 		std::vector<int32_t, BindingListAllocator> texBindings((BindingListAllocator(arena)));
 		texBindings.reserve(64);
 
 		using BindingNamesListAllocator = ProxyAllocator<CStrIntern, Arena>;
 		std::vector<CStrIntern, BindingNamesListAllocator> texBindingNames((BindingNamesListAllocator(arena)));
 		texBindingNames.reserve(64);
 
 		while (idxTechStart < techBuckets.size())
 		{
 			CShaderTechniquePtr currentTech = techBuckets[idxTechStart].tech;
 
 			// Find runs [idxTechStart, idxTechEnd) in techBuckets of the same technique
 			size_t idxTechEnd;
 			for (idxTechEnd = idxTechStart + 1; idxTechEnd < techBuckets.size(); ++idxTechEnd)
 			{
 				if (techBuckets[idxTechEnd].tech != currentTech)
 					break;
 			}
 
 			// For each of the technique's passes, render all the models in this run
 			for (int pass = 0; pass < currentTech->GetNumPasses(); ++pass)
 			{
 				deviceCommandContext->SetGraphicsPipelineState(
 					currentTech->GetGraphicsPipelineStateDesc(pass));
 				deviceCommandContext->BeginPass();
 
 				Renderer::Backend::IShaderProgram* shader = currentTech->GetShader(pass);
 
 				modifier->BeginPass(deviceCommandContext, shader);
 
 				// TODO: Use a more generic approach to handle bound queries.
 				bool boundTime = false;
 				bool boundWaterTexture = false;
 				bool boundSkyCube = false;
 
-				m->vertexRenderer->BeginPass();
-
 				// When the shader technique changes, textures need to be
 				// rebound, so ensure there are no remnants from the last pass.
 				// (the vector size is set to 0, but memory is not freed)
 				currentTexs.clear();
 				texBindings.clear();
 				texBindingNames.clear();
 
 				CModelDef* currentModeldef = NULL;
 				CShaderUniforms currentStaticUniforms;
 
 				for (size_t idx = idxTechStart; idx < idxTechEnd; ++idx)
 				{
 					CModel** models = techBuckets[idx].models;
 					size_t numModels = techBuckets[idx].numModels;
 					for (size_t i = 0; i < numModels; ++i)
 					{
 						CModel* model = models[i];
 
 						if (flags && !(model->GetFlags() & flags))
 							continue;
 
 						const CMaterial::SamplersVector& samplers = model->GetMaterial().GetSamplers();
 						size_t samplersNum = samplers.size();
 
 						// make sure the vectors are the right virtual sizes, and also
 						// reallocate if there are more samplers than expected.
 						if (currentTexs.size() != samplersNum)
 						{
 							currentTexs.resize(samplersNum, NULL);
 							texBindings.resize(samplersNum, -1);
 							texBindingNames.resize(samplersNum, CStrIntern());
 
 							// ensure they are definitely empty
 							std::fill(texBindings.begin(), texBindings.end(), -1);
 							std::fill(currentTexs.begin(), currentTexs.end(), nullptr);
 							std::fill(texBindingNames.begin(), texBindingNames.end(), CStrIntern());
 						}
 
 						// bind the samplers to the shader
 						for (size_t s = 0; s < samplersNum; ++s)
 						{
 							const CMaterial::TextureSampler& samp = samplers[s];
 
 							// check that the handles are current
 							// and reevaluate them if necessary
 							if (texBindingNames[s] != samp.Name || texBindings[s] < 0)
 							{
 								texBindings[s] = shader->GetBindingSlot(samp.Name);
 								texBindingNames[s] = samp.Name;
 							}
 
 							// same with the actual sampler bindings
 							CTexture* newTex = samp.Sampler.get();
 							if (texBindings[s] >= 0 && newTex != currentTexs[s])
 							{
 								newTex->UploadBackendTextureIfNeeded(deviceCommandContext);
 								deviceCommandContext->SetTexture(
 									texBindings[s], newTex->GetBackendTexture());
 								currentTexs[s] = newTex;
 							}
 						}
 
 						// Bind modeldef when it changes
 						CModelDef* newModeldef = model->GetModelDef().get();
 						if (newModeldef != currentModeldef)
 						{
 							currentModeldef = newModeldef;
 							m->vertexRenderer->PrepareModelDef(deviceCommandContext, *currentModeldef);
 						}
 
 						// Bind all uniforms when any change
 						CShaderUniforms newStaticUniforms = model->GetMaterial().GetStaticUniforms();
 						if (newStaticUniforms != currentStaticUniforms)
 						{
 							currentStaticUniforms = newStaticUniforms;
 							currentStaticUniforms.BindUniforms(deviceCommandContext, shader);
 						}
 
 						const CShaderRenderQueries& renderQueries = model->GetMaterial().GetRenderQueries();
 
 						for (size_t q = 0; q < renderQueries.GetSize(); ++q)
 						{
 							CShaderRenderQueries::RenderQuery rq = renderQueries.GetItem(q);
 							if (rq.first == RQUERY_TIME)
 							{
 								if (!boundTime)
 								{
 									deviceCommandContext->SetUniform(
 										shader->GetBindingSlot(rq.second), time);
 									boundTime = true;
 								}
 							}
 							else if (rq.first == RQUERY_WATER_TEX)
 							{
 								if (!boundWaterTexture)
 								{
 									const double period = 1.6;
 									const WaterManager& waterManager = g_Renderer.GetSceneRenderer().GetWaterManager();
 									if (waterManager.m_RenderWater && waterManager.WillRenderFancyWater())
 									{
 										const CTexturePtr& waterTexture = waterManager.m_NormalMap[waterManager.GetCurrentTextureIndex(period)];
 										waterTexture->UploadBackendTextureIfNeeded(deviceCommandContext);
 										deviceCommandContext->SetTexture(
 											shader->GetBindingSlot(str_waterTex),
 											waterTexture->GetBackendTexture());
 									}
 									else
 									{
 										deviceCommandContext->SetTexture(
 											shader->GetBindingSlot(str_waterTex),
 											g_Renderer.GetTextureManager().GetErrorTexture()->GetBackendTexture());
 									}
 									boundWaterTexture = true;
 								}
 							}
 							else if (rq.first == RQUERY_SKY_CUBE)
 							{
 								if (!boundSkyCube)
 								{
 									deviceCommandContext->SetTexture(
 										shader->GetBindingSlot(str_skyCube),
 										g_Renderer.GetSceneRenderer().GetSkyManager().GetSkyCube());
 									boundSkyCube = true;
 								}
 							}
 						}
 
 						modifier->PrepareModel(deviceCommandContext, model);
 
 						CModelRData* rdata = static_cast<CModelRData*>(model->GetRenderData());
 						ENSURE(rdata->GetKey() == m->vertexRenderer.get());
 
 						m->vertexRenderer->RenderModel(deviceCommandContext, shader, model, rdata);
 					}
 				}
 
-				m->vertexRenderer->EndPass(deviceCommandContext);
-
 				deviceCommandContext->EndPass();
 			}
 
 			idxTechStart = idxTechEnd;
 		}
 	}
 }
Index: ps/trunk/source/renderer/ModelRenderer.h
===================================================================
--- ps/trunk/source/renderer/ModelRenderer.h	(revision 27181)
+++ ps/trunk/source/renderer/ModelRenderer.h	(revision 27182)
@@ -1,279 +1,287 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 /*
  * Home to the ModelRenderer class, an abstract base class that manages
  * a per-frame list of submitted models, as well as simple helper
  * classes.
  */
 
 #ifndef INCLUDED_MODELRENDERER
 #define INCLUDED_MODELRENDERER
 
 #include <memory>
 
 #include "graphics/MeshManager.h"
 #include "graphics/RenderableObject.h"
 #include "graphics/SColor.h"
 #include "renderer/backend/IDeviceCommandContext.h"
 #include "renderer/VertexArray.h"
 
 class RenderModifier;
 typedef std::shared_ptr<RenderModifier> RenderModifierPtr;
 
 class LitRenderModifier;
 typedef std::shared_ptr<LitRenderModifier> LitRenderModifierPtr;
 
 class ModelVertexRenderer;
 typedef std::shared_ptr<ModelVertexRenderer> ModelVertexRendererPtr;
 
 class ModelRenderer;
 typedef std::shared_ptr<ModelRenderer> ModelRendererPtr;
 
 class CModel;
 class CShaderDefines;
 
 /**
  * Class CModelRData: Render data that is maintained per CModel.
  * ModelRenderer implementations may derive from this class to store
  * per-CModel data.
  *
  * The main purpose of this class over CRenderData is to track which
  * ModelRenderer the render data belongs to (via the key that is passed
  * to the constructor). When a model changes the renderer it uses
  * (e.g. via run-time modification of the renderpath configuration),
  * the old ModelRenderer's render data is supposed to be replaced by
  * the new data.
  */
 class CModelRData : public CRenderData
 {
 public:
 	CModelRData(const void* key) : m_Key(key) { }
 
 	/**
 	 * GetKey: Retrieve the key that can be used to identify the
 	 * ModelRenderer that created this data.
 	 *
 	 * @return The opaque key that was passed to the constructor.
 	 */
 	const void* GetKey() const { return m_Key; }
 
 private:
 	/// The key for model renderer identification
 	const void* m_Key;
 };
 
 
 /**
  * Class ModelRenderer: Abstract base class for all model renders.
  *
  * A ModelRenderer manages a per-frame list of models.
  *
  * It is supposed to be derived in order to create new ways in which
  * the per-frame list of models can be managed (for batching, for
  * transparent rendering, etc.) or potentially for rarely used special
  * effects.
  *
  * A typical ModelRenderer will delegate vertex transformation/setup
  * to a ModelVertexRenderer.
  * It will delegate fragment stage setup to a RenderModifier.
  *
  * For most purposes, you should use a BatchModelRenderer with
  * specialized ModelVertexRenderer and RenderModifier implementations.
  *
  * It is suggested that a derived class implement the provided generic
  * Render function, however in some cases it may be necessary to supply
  * a Render function with a different prototype.
  *
  * ModelRenderer also contains a number of static helper functions
  * for building vertex arrays.
  */
 class ModelRenderer
 {
 public:
 	ModelRenderer() { }
 	virtual ~ModelRenderer() { }
 
 	/**
 	 * Initialise global settings.
 	 * Should be called before using the class.
 	 */
 	static void Init();
 
 	/**
 	 * Submit: Submit a model for rendering this frame.
 	 *
 	 * preconditions : The model must not have been submitted to any
 	 * ModelRenderer in this frame. Submit may only be called
 	 * after EndFrame and before PrepareModels.
 	 *
 	 * @param model The model that will be added to the list of models
 	 * submitted this frame.
 	 */
 	virtual void Submit(int cullGroup, CModel* model) = 0;
 
 	/**
 	 * PrepareModels: Calculate renderer data for all previously
 	 * submitted models.
 	 *
 	 * Must be called before any rendering calls and after all models
 	 * for this frame have been submitted.
 	 */
 	virtual void PrepareModels() = 0;
 
 	/**
+	 * Upload renderer data for all previously submitted models to backend.
+	 *
+	 * Must be called before any rendering calls and after all models
+	 * for this frame have been prepared.
+	 */
+	virtual void UploadModels(
+		Renderer::Backend::IDeviceCommandContext* deviceCommandContext) = 0;
+
+	/**
 	 * EndFrame: Remove all models from the list of submitted
 	 * models.
 	 */
 	virtual void EndFrame() = 0;
 
 	/**
 	 * Render: Render submitted models, using the given RenderModifier to setup
 	 * the fragment stage.
 	 *
 	 * @note It is suggested that derived model renderers implement and use
 	 * this Render functions. However, a highly specialized model renderer
 	 * may need to "disable" this function and provide its own Render function
 	 * with a different prototype.
 	 *
 	 * preconditions  : PrepareModels must be called after all models have been
 	 * submitted and before calling Render.
 	 *
 	 * @param modifier The RenderModifier that specifies the fragment stage.
 	 * @param flags If flags is 0, all submitted models are rendered.
 	 * If flags is non-zero, only models that contain flags in their
 	 * CModel::GetFlags() are rendered.
 	 */
 	virtual void Render(
 		Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 		const RenderModifierPtr& modifier, const CShaderDefines& context, int cullGroup, int flags) = 0;
 
 	/**
 	 * CopyPositionAndNormals: Copy unanimated object-space vertices and
 	 * normals into the given vertex array.
 	 *
 	 * @param mdef The underlying CModelDef that contains mesh data.
 	 * @param Position Points to the array that will receive
 	 * position vectors. The array behind the iterator
 	 * must be large enough to hold model->GetModelDef()->GetNumVertices()
 	 * vertices.
 	 * @param Normal Points to the array that will receive normal vectors.
 	 * The array behind the iterator must be as large as the Position array.
 	 */
 	static void CopyPositionAndNormals(
 			const CModelDefPtr& mdef,
 			const VertexArrayIterator<CVector3D>& Position,
 			const VertexArrayIterator<CVector3D>& Normal);
 
 	/**
 	 * BuildPositionAndNormals: Build animated vertices and normals,
 	 * transformed into world space.
 	 *
 	 * @param model The model that is to be transformed.
 	 * @param Position Points to the array that will receive
 	 * transformed position vectors. The array behind the iterator
 	 * must be large enough to hold model->GetModelDef()->GetNumVertices()
 	 * vertices. It must allow 16 bytes to be written to each element
 	 * (i.e. provide 4 bytes of padding after each CVector3D).
 	 * @param Normal Points to the array that will receive transformed
 	 * normal vectors. The array behind the iterator must be as large as
 	 * the Position array.
 	 */
 	static void BuildPositionAndNormals(
 			CModel* model,
 			const VertexArrayIterator<CVector3D>& Position,
 			const VertexArrayIterator<CVector3D>& Normal);
 
 	/**
 	 * BuildColor4ub: Build lighting colors for the given model,
 	 * based on previously calculated world space normals.
 	 *
 	 * @param model The model that is to be lit.
 	 * @param Normal Array of the model's normal vectors, animated and
 	 * transformed into world space.
 	 * @param Color Points to the array that will receive the lit vertex color.
 	 * The array behind the iterator must large enough to hold
 	 * model->GetModelDef()->GetNumVertices() vertices.
 	 */
 	static void BuildColor4ub(
 			CModel* model,
 			const VertexArrayIterator<CVector3D>& Normal,
 			const VertexArrayIterator<SColor4ub>& Color);
 
 	/**
 	 * BuildUV: Copy UV coordinates into the given vertex array.
 	 *
 	 * @param mdef The model def.
 	 * @param UV Points to the array that will receive UV coordinates.
 	 * The array behind the iterator must large enough to hold
 	 * mdef->GetNumVertices() vertices.
 	 */
 	static void BuildUV(
 			const CModelDefPtr& mdef,
 			const VertexArrayIterator<float[2]>& UV,
 			int UVset);
 
 	/**
 	 * BuildIndices: Create the indices array for the given CModelDef.
 	 *
 	 * @param mdef The model definition object.
 	 * @param Indices The index array, must be able to hold
 	 * mdef->GetNumFaces()*3 elements.
 	 */
 	static void BuildIndices(
 			const CModelDefPtr& mdef,
 			const VertexArrayIterator<u16>& Indices);
 
 	/**
 	 * GenTangents: Generate tangents for the given CModelDef.
 	 *
 	 * @param mdef The model definition object.
 	 * @param newVertices An out vector of the unindexed vertices with tangents added.
 	 * The new vertices cannot be used with existing face index and must be welded/reindexed.
 	 */
 	static void GenTangents(const CModelDefPtr& mdef, std::vector<float>& newVertices, bool gpuSkinning);
 };
 
 /**
  * Implementation of ModelRenderer that loads the appropriate shaders for
- * rendering each model, and that batches by shader (and by mesh and texture).
- *
- * Note that the term "Shader" is somewhat misleading, as this handled
- * fixed-function rendering using the same API as real GLSL/ARB shaders.
+ * rendering each model, and that batches by shader technique (and by mesh and texture).
  */
 class ShaderModelRenderer : public ModelRenderer
 {
 	friend struct ShaderModelRendererInternals;
 
 public:
 	ShaderModelRenderer(ModelVertexRendererPtr vertexrender);
-	virtual ~ShaderModelRenderer();
+	~ShaderModelRenderer() override;
 
 	// Batching implementations
-	virtual void Submit(int cullGroup, CModel* model);
-	virtual void PrepareModels();
-	virtual void EndFrame();
-	virtual void Render(
+	void Submit(int cullGroup, CModel* model) override;
+	void PrepareModels() override;
+	void UploadModels(
+		Renderer::Backend::IDeviceCommandContext* deviceCommandContext) override;
+	void EndFrame() override;
+	void Render(
 		Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
-		const RenderModifierPtr& modifier, const CShaderDefines& context, int cullGroup, int flags);
+		const RenderModifierPtr& modifier, const CShaderDefines& context, int cullGroup, int flags) override;
 
 private:
 	struct ShaderModelRendererInternals;
 	ShaderModelRendererInternals* m;
 };
 
 #endif // INCLUDED_MODELRENDERER
Index: ps/trunk/source/renderer/ModelVertexRenderer.h
===================================================================
--- ps/trunk/source/renderer/ModelVertexRenderer.h	(revision 27181)
+++ ps/trunk/source/renderer/ModelVertexRenderer.h	(revision 27182)
@@ -1,154 +1,140 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 /*
  * Definition of ModelVertexRenderer, the abstract base class for model
  * vertex transformation implementations.
  */
 
 #ifndef INCLUDED_MODELVERTEXRENDERER
 #define INCLUDED_MODELVERTEXRENDERER
 
 #include "graphics/MeshManager.h"
 #include "graphics/ShaderProgramPtr.h"
 #include "renderer/backend/IDeviceCommandContext.h"
 #include "renderer/backend/IShaderProgram.h"
 
 class CModel;
 class CModelRData;
 
 /**
  * Class ModelVertexRenderer: Normal ModelRenderer implementations delegate
  * vertex array management and vertex transformation to an implementation of
  * ModelVertexRenderer.
  *
  * ModelVertexRenderer implementations should be designed so that one
  * instance of the implementation can be used with more than one ModelRenderer
  * simultaneously.
  */
 class ModelVertexRenderer
 {
 public:
 	virtual ~ModelVertexRenderer() { }
 
 
 	/**
 	 * CreateModelData: Create internal data for one model.
 	 *
 	 * ModelRenderer implementations must call this once for every
 	 * model that will later be rendered, with @p key set to a value
 	 * that's unique to that ModelRenderer.
 	 *
 	 * ModelVertexRenderer implementations should use this function to
 	 * create per-CModel and per-CModelDef data like vertex arrays.
 	 *
 	 * @param key An opaque pointer to pass to the CModelRData constructor
 	 * @param model The model.
 	 *
 	 * @return A new CModelRData that will be passed into other
 	 * ModelVertexRenderer functions whenever the same CModel is used again.
 	 */
 	virtual CModelRData* CreateModelData(const void* key, CModel* model) = 0;
 
 
 	/**
 	 * UpdateModelData: Calculate per-model data for each frame.
 	 *
 	 * ModelRenderer implementations must call this once per frame for
 	 * every model that is to be rendered in this frame, even if the
 	 * value of updateflags will be zero.
 	 * This implies that this function will also be called at least once
 	 * between a call to CreateModelData and a call to RenderModel.
 	 *
 	 * ModelVertexRenderer implementations should use this function to
 	 * perform software vertex transforms and potentially other per-frame
 	 * calculations.
 	 *
 	 * @param model The model.
 	 * @param data Private data as returned by CreateModelData.
 	 * @param updateflags Flags indicating which data has changed during
 	 * the frame. The value is the same as the value of the model's
 	 * CRenderData::m_UpdateFlags.
 	 */
 	virtual void UpdateModelData(CModel* model, CModelRData* data, int updateflags) = 0;
 
-
 	/**
-	 * BeginPass: Setup backend state for this ModelVertexRenderer.
-	 *
-	 * ModelVertexRenderer implementations should prepare "heavy"
-	 * state such as vertex shader state to prepare for rendering models
-	 * and delivering vertex data to the fragment stage as described by
-	 * shader.
-	 *
-	 * ModelRenderer implementations must call this function before any
-	 * calls to other rendering related functions.
+	 * Upload per-model data to backend.
 	 *
-	 * Recursive calls to BeginPass are not allowed, and every BeginPass
-	 * is matched by a corresponding call to EndPass.
-	 */
-	virtual void BeginPass() = 0;
-
-
-	/**
-	 * EndPass: Cleanup OpenGL state set up by BeginPass.
+	 * ModelRenderer implementations must call this after UpdateModelData once
+	 * per frame for every model that is to be rendered in this frame.
 	 *
-	 * ModelRenderer implementations must call this function after
-	 * rendering related functions for one pass have been called.
+	 * ModelVertexRenderer implementations should use this function to
+	 * upload all needed data to backend.
 	 */
-	virtual void EndPass(Renderer::Backend::IDeviceCommandContext* deviceCommandContext) = 0;
-
+	virtual void UploadModelData(
+		Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
+		CModel* model, CModelRData* data) = 0;
 
 	/**
 	 * PrepareModelDef: Setup backend state for rendering of models that
 	 * use the given CModelDef object as base.
 	 *
 	 * ModelRenderer implementations must call this function before
 	 * rendering a sequence of models based on the given CModelDef.
 	 * When a ModelRenderer switches back and forth between CModelDefs,
 	 * it must call PrepareModelDef for every switch.
 	 *
 	 * @param def The model definition.
 	 */
 	virtual void PrepareModelDef(
 		Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 		const CModelDef& def) = 0;
 
 
 	/**
 	 * RenderModel: Invoke the rendering commands for the given model.
 	 *
 	 * ModelRenderer implementations must call this function to perform
 	 * the actual rendering.
 	 *
 	 * preconditions  : The most recent call to PrepareModelDef since
 	 * BeginPass has been for model->GetModelDef().
 	 *
 	 * @param model The model that should be rendered.
 	 * @param data Private data for the model as returned by CreateModelData.
 	 *
 	 * postconditions : Subsequent calls to RenderModel for models
 	 * that use the same CModelDef object and the same texture must
 	 * succeed.
 	 */
 	virtual void RenderModel(
 		Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 		Renderer::Backend::IShaderProgram* shader, CModel* model, CModelRData* data) = 0;
 };
 
 
 #endif // INCLUDED_MODELVERTEXRENDERER
Index: ps/trunk/source/renderer/OverlayRenderer.cpp
===================================================================
--- ps/trunk/source/renderer/OverlayRenderer.cpp	(revision 27181)
+++ ps/trunk/source/renderer/OverlayRenderer.cpp	(revision 27182)
@@ -1,818 +1,822 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "OverlayRenderer.h"
 
 #include "graphics/Camera.h"
 #include "graphics/LOSTexture.h"
 #include "graphics/Overlay.h"
 #include "graphics/ShaderManager.h"
 #include "graphics/Terrain.h"
 #include "graphics/TextureManager.h"
 #include "lib/hash.h"
 #include "maths/MathUtil.h"
 #include "maths/Quaternion.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Game.h"
 #include "ps/Profile.h"
 #include "renderer/DebugRenderer.h"
 #include "renderer/Renderer.h"
 #include "renderer/SceneRenderer.h"
 #include "renderer/TexturedLineRData.h"
 #include "renderer/VertexArray.h"
 #include "renderer/VertexBuffer.h"
 #include "renderer/VertexBufferManager.h"
 #include "simulation2/components/ICmpWaterManager.h"
 #include "simulation2/Simulation2.h"
 #include "simulation2/system/SimContext.h"
 
 #include <unordered_map>
 
 namespace
 {
 
 CShaderTechniquePtr GetOverlayLineShaderTechnique(const CShaderDefines& defines)
 {
 	return g_Renderer.GetShaderManager().LoadEffect(str_overlay_line, defines);
 }
 
 } // anonymous namespace
 
 /**
  * Key used to group quads into batches for more efficient rendering. Currently groups by the combination
  * of the main texture and the texture mask, to minimize texture swapping during rendering.
  */
 struct QuadBatchKey
 {
 	QuadBatchKey (const CTexturePtr& texture, const CTexturePtr& textureMask)
 		: m_Texture(texture), m_TextureMask(textureMask)
 	{ }
 
 	bool operator==(const QuadBatchKey& other) const
 	{
 		return (m_Texture == other.m_Texture && m_TextureMask == other.m_TextureMask);
 	}
 
 	CTexturePtr m_Texture;
 	CTexturePtr m_TextureMask;
 };
 
 struct QuadBatchHash
 {
 	std::size_t operator()(const QuadBatchKey& d) const
 	{
 		size_t seed = 0;
 		hash_combine(seed, d.m_Texture);
 		hash_combine(seed, d.m_TextureMask);
 		return seed;
 	}
 };
 
 /**
  * Holds information about a single quad rendering batch.
  */
 class QuadBatchData : public CRenderData
 {
 public:
 	QuadBatchData() : m_IndicesBase(0), m_NumRenderQuads(0) { }
 
 	/// Holds the quad overlay structures requested to be rendered in this batch. Must be cleared
 	/// after each frame.
 	std::vector<SOverlayQuad*> m_Quads;
 
 	/// Start index of this batch into the dedicated quad indices VertexArray (see OverlayInternals).
 	size_t m_IndicesBase;
 	/// Amount of quads to actually render in this batch. Potentially (although unlikely to be)
 	/// different from m_Quads.size() due to restrictions on the total amount of quads that can be
 	/// rendered. Must be reset after each frame.
 	size_t m_NumRenderQuads;
 };
 
 struct OverlayRendererInternals
 {
 	using QuadBatchMap = std::unordered_map<QuadBatchKey, QuadBatchData, QuadBatchHash>;
 
 	OverlayRendererInternals();
 	~OverlayRendererInternals(){ }
 
 	std::vector<SOverlayLine*> lines;
 	std::vector<SOverlayTexturedLine*> texlines;
 	std::vector<SOverlaySprite*> sprites;
 	std::vector<SOverlayQuad*> quads;
 	std::vector<SOverlaySphere*> spheres;
 
 	QuadBatchMap quadBatchMap;
 
 	// Dedicated vertex/index buffers for rendering all quads (to within the limits set by
 	// MAX_QUAD_OVERLAYS).
 	VertexArray quadVertices;
 	VertexArray::Attribute quadAttributePos;
 	VertexArray::Attribute quadAttributeColor;
 	VertexArray::Attribute quadAttributeUV;
 	VertexIndexArray quadIndices;
 
 	// Maximum amount of quad overlays we support for rendering. This limit is set to be able to
 	// render all quads from a single dedicated VB without having to reallocate it, which is much
 	// faster in the typical case of rendering only a handful of quads. When modifying this value,
 	// you must take care for the new amount of quads to fit in a single backend buffer (which is
 	// not likely to be a problem).
 	static const size_t MAX_QUAD_OVERLAYS = 1024;
 
 	// Sets of commonly-(re)used shader defines.
 	CShaderDefines defsOverlayLineNormal;
 	CShaderDefines defsOverlayLineAlwaysVisible;
 	CShaderDefines defsQuadOverlay;
 
 	// Geometry for a unit sphere
 	std::vector<float> sphereVertexes;
 	std::vector<u16> sphereIndexes;
 	void GenerateSphere();
 
 	// Performs one-time setup. Called from CRenderer::Open, after graphics capabilities have
 	// been detected. Note that no backend buffer must be created before this is called, since
 	// the shader path and graphics capabilities are not guaranteed to be stable before this
 	// point.
 	void Initialize();
 };
 
 const float OverlayRenderer::OVERLAY_VOFFSET = 0.2f;
 
 OverlayRendererInternals::OverlayRendererInternals()
 	: quadVertices(Renderer::Backend::IBuffer::Type::VERTEX, true),
 	quadIndices(false)
 {
 	quadAttributePos.format = Renderer::Backend::Format::R32G32B32_SFLOAT;
 	quadVertices.AddAttribute(&quadAttributePos);
 
 	quadAttributeColor.format = Renderer::Backend::Format::R8G8B8A8_UNORM;
 	quadVertices.AddAttribute(&quadAttributeColor);
 
 	quadAttributeUV.format = Renderer::Backend::Format::R16G16_SINT;
 	quadVertices.AddAttribute(&quadAttributeUV);
 
 	// Note that we're reusing the textured overlay line shader for the quad overlay rendering. This
 	// is because their code is almost identical; the only difference is that for the quad overlays
 	// we want to use a vertex color stream as opposed to an objectColor uniform. To this end, the
 	// shader has been set up to switch between the two behaviours based on the USE_OBJECTCOLOR define.
 	defsOverlayLineNormal.Add(str_USE_OBJECTCOLOR, str_1);
 	defsOverlayLineAlwaysVisible.Add(str_USE_OBJECTCOLOR, str_1);
 	defsOverlayLineAlwaysVisible.Add(str_IGNORE_LOS, str_1);
 }
 
 void OverlayRendererInternals::Initialize()
 {
 	// Perform any initialization after graphics capabilities have been detected. Notably,
 	// only at this point can we safely allocate backend buffer (in contrast to e.g. in the constructor),
 	// because their creation depends on the shader path, which is not reliably set before this point.
 
 	quadVertices.SetNumberOfVertices(MAX_QUAD_OVERLAYS * 4);
 	quadVertices.Layout(); // allocate backing store
 
 	quadIndices.SetNumberOfVertices(MAX_QUAD_OVERLAYS * 6);
 	quadIndices.Layout(); // allocate backing store
 
 	// Since the quads in the vertex array are independent and always consist of exactly 4 vertices per quad, the
 	// indices are always the same; we can therefore fill in all the indices once and pretty much forget about
 	// them. We then also no longer need its backing store, since we never change any indices afterwards.
 	VertexArrayIterator<u16> index = quadIndices.GetIterator();
 	for (u16 i = 0; i < static_cast<u16>(MAX_QUAD_OVERLAYS); ++i)
 	{
 		*index++ = i * 4 + 0;
 		*index++ = i * 4 + 1;
 		*index++ = i * 4 + 2;
 		*index++ = i * 4 + 2;
 		*index++ = i * 4 + 3;
 		*index++ = i * 4 + 0;
 	}
 	quadIndices.Upload();
 	quadIndices.FreeBackingStore();
 }
 
 OverlayRenderer::OverlayRenderer()
 {
 	m = new OverlayRendererInternals();
 }
 
 OverlayRenderer::~OverlayRenderer()
 {
 	delete m;
 }
 
 void OverlayRenderer::Initialize()
 {
 	m->Initialize();
 }
 
 void OverlayRenderer::Submit(SOverlayLine* line)
 {
 	m->lines.push_back(line);
 }
 
 void OverlayRenderer::Submit(SOverlayTexturedLine* line)
 {
 	// Simplify the rest of the code by guaranteeing non-empty lines
 	if (line->m_Coords.empty())
 		return;
 
 	m->texlines.push_back(line);
 }
 
 void OverlayRenderer::Submit(SOverlaySprite* overlay)
 {
 	m->sprites.push_back(overlay);
 }
 
 void OverlayRenderer::Submit(SOverlayQuad* overlay)
 {
 	m->quads.push_back(overlay);
 }
 
 void OverlayRenderer::Submit(SOverlaySphere* overlay)
 {
 	m->spheres.push_back(overlay);
 }
 
 void OverlayRenderer::EndFrame()
 {
 	m->lines.clear();
 	m->texlines.clear();
 	m->sprites.clear();
 	m->quads.clear();
 	m->spheres.clear();
 
 	// this should leave the capacity unchanged, which is okay since it
 	// won't be very large or very variable
 
 	// Empty the batch rendering data structures, but keep their key mappings around for the next frames
 	for (OverlayRendererInternals::QuadBatchMap::iterator it = m->quadBatchMap.begin(); it != m->quadBatchMap.end(); ++it)
 	{
 		QuadBatchData& quadBatchData = (it->second);
 		quadBatchData.m_Quads.clear();
 		quadBatchData.m_NumRenderQuads = 0;
 		quadBatchData.m_IndicesBase = 0;
 	}
 }
 
 void OverlayRenderer::PrepareForRendering()
 {
 	PROFILE3("prepare overlays");
 
 	// This is where we should do something like sort the overlays by
 	// color/sprite/etc for more efficient rendering
 
 	for (size_t i = 0; i < m->texlines.size(); ++i)
 	{
 		SOverlayTexturedLine* line = m->texlines[i];
 		if (!line->m_RenderData)
 		{
 			line->m_RenderData = std::make_shared<CTexturedLineRData>();
 			line->m_RenderData->Update(*line);
 			// We assume the overlay line will get replaced by the caller
 			// if terrain changes, so we don't need to detect that here and
 			// call Update again. Also we assume the caller won't change
 			// any of the parameters after first submitting the line.
 		}
 	}
 
 	// Group quad overlays by their texture/mask combination for efficient rendering
 	// TODO: consider doing this directly in Submit()
 	for (size_t i = 0; i < m->quads.size(); ++i)
 	{
 		SOverlayQuad* const quad = m->quads[i];
 
 		QuadBatchKey textures(quad->m_Texture, quad->m_TextureMask);
 		QuadBatchData& batchRenderData = m->quadBatchMap[textures]; // will create entry if it doesn't already exist
 
 		// add overlay to list of quads
 		batchRenderData.m_Quads.push_back(quad);
 	}
 
 	const CVector3D vOffset(0, OverlayRenderer::OVERLAY_VOFFSET, 0);
 
 	// Write quad overlay vertices/indices to VA backing store
 	VertexArrayIterator<CVector3D> vertexPos = m->quadAttributePos.GetIterator<CVector3D>();
 	VertexArrayIterator<SColor4ub> vertexColor = m->quadAttributeColor.GetIterator<SColor4ub>();
 	VertexArrayIterator<short[2]> vertexUV = m->quadAttributeUV.GetIterator<short[2]>();
 
 	size_t indicesIdx = 0;
 	size_t totalNumQuads = 0;
 
 	for (OverlayRendererInternals::QuadBatchMap::iterator it = m->quadBatchMap.begin(); it != m->quadBatchMap.end(); ++it)
 	{
 		QuadBatchData& batchRenderData = (it->second);
 		batchRenderData.m_NumRenderQuads = 0;
 
 		if (batchRenderData.m_Quads.empty())
 			continue;
 
 		// Remember the current index into the (entire) indices array as our base offset for this batch
 		batchRenderData.m_IndicesBase = indicesIdx;
 
 		// points to the index where each iteration's vertices will be appended
 		for (size_t i = 0; i < batchRenderData.m_Quads.size() && totalNumQuads < OverlayRendererInternals::MAX_QUAD_OVERLAYS; i++)
 		{
 			const SOverlayQuad* quad = batchRenderData.m_Quads[i];
 
 			const SColor4ub quadColor = quad->m_Color.AsSColor4ub();
 
 			*vertexPos++ = quad->m_Corners[0] + vOffset;
 			*vertexPos++ = quad->m_Corners[1] + vOffset;
 			*vertexPos++ = quad->m_Corners[2] + vOffset;
 			*vertexPos++ = quad->m_Corners[3] + vOffset;
 
 			(*vertexUV)[0] = 0;
 			(*vertexUV)[1] = 0;
 			++vertexUV;
 			(*vertexUV)[0] = 0;
 			(*vertexUV)[1] = 1;
 			++vertexUV;
 			(*vertexUV)[0] = 1;
 			(*vertexUV)[1] = 1;
 			++vertexUV;
 			(*vertexUV)[0] = 1;
 			(*vertexUV)[1] = 0;
 			++vertexUV;
 
 			*vertexColor++ = quadColor;
 			*vertexColor++ = quadColor;
 			*vertexColor++ = quadColor;
 			*vertexColor++ = quadColor;
 
 			indicesIdx += 6;
 
 			totalNumQuads++;
 			batchRenderData.m_NumRenderQuads++;
 		}
 	}
 
 	m->quadVertices.Upload();
 	// don't free the backing store! we'll overwrite it on the next frame to save a reallocation.
 
 	m->quadVertices.PrepareForRendering();
 }
 
+void OverlayRenderer::Upload(
+	Renderer::Backend::IDeviceCommandContext* deviceCommandContext)
+{
+	m->quadVertices.UploadIfNeeded(deviceCommandContext);
+	m->quadIndices.UploadIfNeeded(deviceCommandContext);
+}
+
 void OverlayRenderer::RenderOverlaysBeforeWater(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext)
 {
 	PROFILE3_GPU("overlays (before)");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render overlays before water");
 
 	for (SOverlayLine* line : m->lines)
 	{
 		if (line->m_Coords.empty())
 			continue;
 
 		g_Renderer.GetDebugRenderer().DrawLine(line->m_Coords, line->m_Color, static_cast<float>(line->m_Thickness));
 	}
 }
 
 void OverlayRenderer::RenderOverlaysAfterWater(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext)
 {
 	PROFILE3_GPU("overlays (after)");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render overlays after water");
 
 	RenderTexturedOverlayLines(deviceCommandContext);
 	RenderQuadOverlays(deviceCommandContext);
 	RenderSphereOverlays(deviceCommandContext);
 }
 
 void OverlayRenderer::RenderTexturedOverlayLines(Renderer::Backend::IDeviceCommandContext* deviceCommandContext)
 {
 	if (m->texlines.empty())
 		return;
 
 	CLOSTexture& los = g_Renderer.GetSceneRenderer().GetScene().GetLOSTexture();
 
 	CShaderTechniquePtr shaderTechTexLineNormal = GetOverlayLineShaderTechnique(m->defsOverlayLineNormal);
 	if (shaderTechTexLineNormal)
 	{
 		Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 			shaderTechTexLineNormal->GetGraphicsPipelineStateDesc();
 		pipelineStateDesc.depthStencilState.depthWriteEnabled = false;
 		pipelineStateDesc.blendState.enabled = true;
 		pipelineStateDesc.blendState.srcColorBlendFactor = pipelineStateDesc.blendState.srcAlphaBlendFactor =
 			Renderer::Backend::BlendFactor::SRC_ALPHA;
 		pipelineStateDesc.blendState.dstColorBlendFactor = pipelineStateDesc.blendState.dstAlphaBlendFactor =
 			Renderer::Backend::BlendFactor::ONE_MINUS_SRC_ALPHA;
 		pipelineStateDesc.blendState.colorBlendOp = pipelineStateDesc.blendState.alphaBlendOp =
 			Renderer::Backend::BlendOp::ADD;
 		if (g_Renderer.GetSceneRenderer().GetOverlayRenderMode() == WIREFRAME)
 			pipelineStateDesc.rasterizationState.polygonMode = Renderer::Backend::PolygonMode::LINE;
 		deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 		deviceCommandContext->BeginPass();
 
 		Renderer::Backend::IShaderProgram* shaderTexLineNormal = shaderTechTexLineNormal->GetShader();
 
 		deviceCommandContext->SetTexture(
 			shaderTexLineNormal->GetBindingSlot(str_losTex), los.GetTexture());
 
 		const CMatrix3D transform =
 			g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection();
 		deviceCommandContext->SetUniform(
 			shaderTexLineNormal->GetBindingSlot(str_transform), transform.AsFloatArray());
 		deviceCommandContext->SetUniform(
 			shaderTexLineNormal->GetBindingSlot(str_losTransform),
 			los.GetTextureMatrix()[0], los.GetTextureMatrix()[12]);
 
 		// batch render only the non-always-visible overlay lines using the normal shader
 		RenderTexturedOverlayLines(deviceCommandContext, shaderTexLineNormal, false);
 
 		deviceCommandContext->EndPass();
 	}
 
 	CShaderTechniquePtr shaderTechTexLineAlwaysVisible = GetOverlayLineShaderTechnique(m->defsOverlayLineAlwaysVisible);
 	if (shaderTechTexLineAlwaysVisible)
 	{
 		Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 			shaderTechTexLineAlwaysVisible->GetGraphicsPipelineStateDesc();
 		pipelineStateDesc.depthStencilState.depthWriteEnabled = false;
 		pipelineStateDesc.blendState.enabled = true;
 		pipelineStateDesc.blendState.srcColorBlendFactor = pipelineStateDesc.blendState.srcAlphaBlendFactor =
 			Renderer::Backend::BlendFactor::SRC_ALPHA;
 		pipelineStateDesc.blendState.dstColorBlendFactor = pipelineStateDesc.blendState.dstAlphaBlendFactor =
 			Renderer::Backend::BlendFactor::ONE_MINUS_SRC_ALPHA;
 		pipelineStateDesc.blendState.colorBlendOp = pipelineStateDesc.blendState.alphaBlendOp =
 			Renderer::Backend::BlendOp::ADD;
 		if (g_Renderer.GetSceneRenderer().GetOverlayRenderMode() == WIREFRAME)
 			pipelineStateDesc.rasterizationState.polygonMode = Renderer::Backend::PolygonMode::LINE;
 		deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 		deviceCommandContext->BeginPass();
 
 		Renderer::Backend::IShaderProgram* shaderTexLineAlwaysVisible = shaderTechTexLineAlwaysVisible->GetShader();
 
 		// TODO: losTex and losTransform are unused in the always visible shader; see if these can be safely omitted
 		deviceCommandContext->SetTexture(
 			shaderTexLineAlwaysVisible->GetBindingSlot(str_losTex), los.GetTexture());
 
 		const CMatrix3D transform =
 			g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection();
 		deviceCommandContext->SetUniform(
 			shaderTexLineAlwaysVisible->GetBindingSlot(str_transform), transform.AsFloatArray());
 		deviceCommandContext->SetUniform(
 			shaderTexLineAlwaysVisible->GetBindingSlot(str_losTransform),
 			los.GetTextureMatrix()[0], los.GetTextureMatrix()[12]);
 
 		// batch render only the always-visible overlay lines using the LoS-ignored shader
 		RenderTexturedOverlayLines(deviceCommandContext, shaderTexLineAlwaysVisible, true);
 
 		deviceCommandContext->EndPass();
 	}
 }
 
 void OverlayRenderer::RenderTexturedOverlayLines(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	Renderer::Backend::IShaderProgram* shader, bool alwaysVisible)
 {
 	for (size_t i = 0; i < m->texlines.size(); ++i)
 	{
 		SOverlayTexturedLine* line = m->texlines[i];
 
 		// render only those lines matching the requested alwaysVisible status
 		if (!line->m_RenderData || line->m_AlwaysVisible != alwaysVisible)
 			continue;
 
 		ENSURE(line->m_RenderData);
 		line->m_RenderData->Render(deviceCommandContext, *line, shader);
 	}
 }
 
 void OverlayRenderer::RenderQuadOverlays(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext)
 {
 	if (m->quadBatchMap.empty())
 		return;
 
 	CShaderTechniquePtr shaderTech = GetOverlayLineShaderTechnique(m->defsQuadOverlay);
 
 	if (!shaderTech)
 		return;
 
 	Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 		shaderTech->GetGraphicsPipelineStateDesc();
 	pipelineStateDesc.depthStencilState.depthWriteEnabled = false;
 	pipelineStateDesc.blendState.enabled = true;
 	pipelineStateDesc.blendState.srcColorBlendFactor = pipelineStateDesc.blendState.srcAlphaBlendFactor =
 		Renderer::Backend::BlendFactor::SRC_ALPHA;
 	pipelineStateDesc.blendState.dstColorBlendFactor = pipelineStateDesc.blendState.dstAlphaBlendFactor =
 		Renderer::Backend::BlendFactor::ONE_MINUS_SRC_ALPHA;
 	pipelineStateDesc.blendState.colorBlendOp = pipelineStateDesc.blendState.alphaBlendOp =
 		Renderer::Backend::BlendOp::ADD;
 	if (g_Renderer.GetSceneRenderer().GetOverlayRenderMode() == WIREFRAME)
 		pipelineStateDesc.rasterizationState.polygonMode = Renderer::Backend::PolygonMode::LINE;
 	deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 	deviceCommandContext->BeginPass();
 
 	Renderer::Backend::IShaderProgram* shader = shaderTech->GetShader();
 
 	CLOSTexture& los = g_Renderer.GetSceneRenderer().GetScene().GetLOSTexture();
 
 	deviceCommandContext->SetTexture(
 		shader->GetBindingSlot(str_losTex), los.GetTexture());
 	deviceCommandContext->SetUniform(
 		shader->GetBindingSlot(str_losTransform),
 		los.GetTextureMatrix()[0], los.GetTextureMatrix()[12]);
 
 	const CMatrix3D transform =
 		g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection();
 	deviceCommandContext->SetUniform(
 		shader->GetBindingSlot(str_transform), transform.AsFloatArray());
 
-	m->quadVertices.UploadIfNeeded(deviceCommandContext);
-	m->quadIndices.UploadIfNeeded(deviceCommandContext);
-
 	const uint32_t vertexStride = m->quadVertices.GetStride();
 	const uint32_t firstVertexOffset = m->quadVertices.GetOffset() * vertexStride;
 
 	const int32_t baseTexBindingSlot = shader->GetBindingSlot(str_baseTex);
 	const int32_t maskTexBindingSlot = shader->GetBindingSlot(str_maskTex);
 
 	for (OverlayRendererInternals::QuadBatchMap::iterator it = m->quadBatchMap.begin(); it != m->quadBatchMap.end(); ++it)
 	{
 		QuadBatchData& batchRenderData = it->second;
 		const size_t batchNumQuads = batchRenderData.m_NumRenderQuads;
 
 		if (batchNumQuads == 0)
 			continue;
 
 		const QuadBatchKey& maskPair = it->first;
 
 		maskPair.m_Texture->UploadBackendTextureIfNeeded(deviceCommandContext);
 		maskPair.m_TextureMask->UploadBackendTextureIfNeeded(deviceCommandContext);
 
 		deviceCommandContext->SetTexture(
 			baseTexBindingSlot, maskPair.m_Texture->GetBackendTexture());
 		deviceCommandContext->SetTexture(
 			maskTexBindingSlot, maskPair.m_TextureMask->GetBackendTexture());
 
 		// TODO: move setting format out of the loop, we might want move the offset
 		// to the index offset when it's supported.
 		deviceCommandContext->SetVertexAttributeFormat(
 			Renderer::Backend::VertexAttributeStream::POSITION,
 			m->quadAttributePos.format, m->quadAttributePos.offset, vertexStride,
 			Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 		deviceCommandContext->SetVertexAttributeFormat(
 			Renderer::Backend::VertexAttributeStream::COLOR,
 			m->quadAttributeColor.format, m->quadAttributeColor.offset, vertexStride,
 			Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 		deviceCommandContext->SetVertexAttributeFormat(
 			Renderer::Backend::VertexAttributeStream::UV0,
 			m->quadAttributeUV.format, m->quadAttributeUV.offset, vertexStride,
 			Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 		deviceCommandContext->SetVertexAttributeFormat(
 			Renderer::Backend::VertexAttributeStream::UV1,
 			m->quadAttributeUV.format, m->quadAttributeUV.offset, vertexStride,
 			Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 
 		deviceCommandContext->SetVertexBuffer(
 			0, m->quadVertices.GetBuffer(), firstVertexOffset);
 		deviceCommandContext->SetIndexBuffer(m->quadIndices.GetBuffer());
 
 		deviceCommandContext->DrawIndexed(m->quadIndices.GetOffset() + batchRenderData.m_IndicesBase, batchNumQuads * 6, 0);
 
 		g_Renderer.GetStats().m_DrawCalls++;
 		g_Renderer.GetStats().m_OverlayTris += batchNumQuads*2;
 	}
 
 	deviceCommandContext->EndPass();
 }
 
 void OverlayRenderer::RenderForegroundOverlays(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const CCamera& viewCamera)
 {
 	PROFILE3_GPU("overlays (fg)");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render foreground overlays");
 
 	const CVector3D right = -viewCamera.GetOrientation().GetLeft();
 	const CVector3D up = viewCamera.GetOrientation().GetUp();
 
 	CShaderTechniquePtr tech = g_Renderer.GetShaderManager().LoadEffect(str_foreground_overlay);
 	Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 		tech->GetGraphicsPipelineStateDesc();
 	pipelineStateDesc.depthStencilState.depthTestEnabled = false;
 	pipelineStateDesc.blendState.enabled = true;
 	pipelineStateDesc.blendState.srcColorBlendFactor = pipelineStateDesc.blendState.srcAlphaBlendFactor =
 		Renderer::Backend::BlendFactor::SRC_ALPHA;
 	pipelineStateDesc.blendState.dstColorBlendFactor = pipelineStateDesc.blendState.dstAlphaBlendFactor =
 		Renderer::Backend::BlendFactor::ONE_MINUS_SRC_ALPHA;
 	pipelineStateDesc.blendState.colorBlendOp = pipelineStateDesc.blendState.alphaBlendOp =
 		Renderer::Backend::BlendOp::ADD;
 	if (g_Renderer.GetSceneRenderer().GetOverlayRenderMode() == WIREFRAME)
 		pipelineStateDesc.rasterizationState.polygonMode = Renderer::Backend::PolygonMode::LINE;
 	deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 	deviceCommandContext->BeginPass();
 
 	Renderer::Backend::IShaderProgram* shader = tech->GetShader();
 
 	const CMatrix3D transform =
 		g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection();
 	deviceCommandContext->SetUniform(
 		shader->GetBindingSlot(str_transform), transform.AsFloatArray());
 
 	const CVector2D uvs[6] =
 	{
 		{0.0f, 1.0f},
 		{1.0f, 1.0f},
 		{1.0f, 0.0f},
 		{0.0f, 1.0f},
 		{1.0f, 0.0f},
 		{0.0f, 0.0f},
 	};
 
 	deviceCommandContext->SetVertexAttributeFormat(
 		Renderer::Backend::VertexAttributeStream::POSITION,
 		Renderer::Backend::Format::R32G32B32_SFLOAT, 0, sizeof(float) * 3,
 		Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 	deviceCommandContext->SetVertexAttributeFormat(
 		Renderer::Backend::VertexAttributeStream::UV0,
 		Renderer::Backend::Format::R32G32_SFLOAT, 0, sizeof(float) * 2,
 		Renderer::Backend::VertexAttributeRate::PER_VERTEX, 1);
 
 	deviceCommandContext->SetVertexBufferData(
 		1, &uvs[0], std::size(uvs) * sizeof(uvs[0]));
 
 	const int32_t baseTexBindingSlot = shader->GetBindingSlot(str_baseTex);
 	const int32_t colorMulBindingSlot = shader->GetBindingSlot(str_colorMul);
 
 	for (size_t i = 0; i < m->sprites.size(); ++i)
 	{
 		SOverlaySprite* sprite = m->sprites[i];
 		if (!i || sprite->m_Texture != m->sprites[i - 1]->m_Texture)
 		{
 			sprite->m_Texture->UploadBackendTextureIfNeeded(deviceCommandContext);
 			deviceCommandContext->SetTexture(
 				baseTexBindingSlot, sprite->m_Texture->GetBackendTexture());
 		}
 
 		deviceCommandContext->SetUniform(
 			colorMulBindingSlot, sprite->m_Color.AsFloatArray());
 
 		const CVector3D position[6] =
 		{
 			sprite->m_Position + right*sprite->m_X0 + up*sprite->m_Y0,
 			sprite->m_Position + right*sprite->m_X1 + up*sprite->m_Y0,
 			sprite->m_Position + right*sprite->m_X1 + up*sprite->m_Y1,
 			sprite->m_Position + right*sprite->m_X0 + up*sprite->m_Y0,
 			sprite->m_Position + right*sprite->m_X1 + up*sprite->m_Y1,
 			sprite->m_Position + right*sprite->m_X0 + up*sprite->m_Y1
 		};
 
 		deviceCommandContext->SetVertexBufferData(
 			0, &position[0].X, std::size(position) * sizeof(position[0]));
 
 		deviceCommandContext->Draw(0, 6);
 
 		g_Renderer.GetStats().m_DrawCalls++;
 		g_Renderer.GetStats().m_OverlayTris += 2;
 	}
 
 	deviceCommandContext->EndPass();
 }
 
 static void TessellateSphereFace(const CVector3D& a, u16 ai,
 								 const CVector3D& b, u16 bi,
 								 const CVector3D& c, u16 ci,
 								 std::vector<float>& vertexes, std::vector<u16>& indexes, int level)
 {
 	if (level == 0)
 	{
 		indexes.push_back(ai);
 		indexes.push_back(bi);
 		indexes.push_back(ci);
 	}
 	else
 	{
 		CVector3D d = (a + b).Normalized();
 		CVector3D e = (b + c).Normalized();
 		CVector3D f = (c + a).Normalized();
 		int di = vertexes.size() / 3; vertexes.push_back(d.X); vertexes.push_back(d.Y); vertexes.push_back(d.Z);
 		int ei = vertexes.size() / 3; vertexes.push_back(e.X); vertexes.push_back(e.Y); vertexes.push_back(e.Z);
 		int fi = vertexes.size() / 3; vertexes.push_back(f.X); vertexes.push_back(f.Y); vertexes.push_back(f.Z);
 		TessellateSphereFace(a,ai, d,di, f,fi, vertexes, indexes, level-1);
 		TessellateSphereFace(d,di, b,bi, e,ei, vertexes, indexes, level-1);
 		TessellateSphereFace(f,fi, e,ei, c,ci, vertexes, indexes, level-1);
 		TessellateSphereFace(d,di, e,ei, f,fi, vertexes, indexes, level-1);
 	}
 }
 
 static void TessellateSphere(std::vector<float>& vertexes, std::vector<u16>& indexes, int level)
 {
 	/* Start with a tetrahedron, then tessellate */
 	float s = sqrtf(0.5f);
 #define VERT(a,b,c) vertexes.push_back(a); vertexes.push_back(b); vertexes.push_back(c);
 	VERT(-s,  0, -s);
 	VERT( s,  0, -s);
 	VERT( s,  0,  s);
 	VERT(-s,  0,  s);
 	VERT( 0, -1,  0);
 	VERT( 0,  1,  0);
 #define FACE(a,b,c) \
 	TessellateSphereFace( \
 		CVector3D(vertexes[a*3], vertexes[a*3+1], vertexes[a*3+2]), a, \
 		CVector3D(vertexes[b*3], vertexes[b*3+1], vertexes[b*3+2]), b, \
 		CVector3D(vertexes[c*3], vertexes[c*3+1], vertexes[c*3+2]), c, \
 		vertexes, indexes, level);
 	FACE(0,4,1);
 	FACE(1,4,2);
 	FACE(2,4,3);
 	FACE(3,4,0);
 	FACE(1,5,0);
 	FACE(2,5,1);
 	FACE(3,5,2);
 	FACE(0,5,3);
 #undef FACE
 #undef VERT
 }
 
 void OverlayRendererInternals::GenerateSphere()
 {
 	if (sphereVertexes.empty())
 		TessellateSphere(sphereVertexes, sphereIndexes, 3);
 }
 
 void OverlayRenderer::RenderSphereOverlays(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext)
 {
 	PROFILE3_GPU("overlays (spheres)");
 
 	if (m->spheres.empty())
 		return;
 
 	Renderer::Backend::IShaderProgram* shader = nullptr;
 	CShaderTechniquePtr tech;
 
 	tech = g_Renderer.GetShaderManager().LoadEffect(str_overlay_solid);
 	Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 		tech->GetGraphicsPipelineStateDesc();
 	pipelineStateDesc.depthStencilState.depthWriteEnabled = false;
 	pipelineStateDesc.blendState.enabled = true;
 	pipelineStateDesc.blendState.srcColorBlendFactor = pipelineStateDesc.blendState.srcAlphaBlendFactor =
 		Renderer::Backend::BlendFactor::SRC_ALPHA;
 	pipelineStateDesc.blendState.dstColorBlendFactor = pipelineStateDesc.blendState.dstAlphaBlendFactor =
 		Renderer::Backend::BlendFactor::ONE_MINUS_SRC_ALPHA;
 	pipelineStateDesc.blendState.colorBlendOp = pipelineStateDesc.blendState.alphaBlendOp =
 		Renderer::Backend::BlendOp::ADD;
 	deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 	deviceCommandContext->BeginPass();
 
 	shader = tech->GetShader();
 
 	const CMatrix3D transform =
 		g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection();
 	deviceCommandContext->SetUniform(
 		shader->GetBindingSlot(str_transform), transform.AsFloatArray());
 
 	m->GenerateSphere();
 
 	deviceCommandContext->SetVertexAttributeFormat(
 		Renderer::Backend::VertexAttributeStream::POSITION,
 		Renderer::Backend::Format::R32G32B32_SFLOAT, 0, 0,
 		Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 
 	deviceCommandContext->SetVertexBufferData(
 		0, m->sphereVertexes.data(), m->sphereVertexes.size() * sizeof(m->sphereVertexes[0]));
 	deviceCommandContext->SetIndexBufferData(
 		m->sphereIndexes.data(), m->sphereIndexes.size() * sizeof(m->sphereIndexes[0]));
 
 	for (size_t i = 0; i < m->spheres.size(); ++i)
 	{
 		SOverlaySphere* sphere = m->spheres[i];
 
 		CMatrix3D instancingTransform;
 		instancingTransform.SetIdentity();
 		instancingTransform.Scale(
 			sphere->m_Radius, sphere->m_Radius, sphere->m_Radius);
 		instancingTransform.Translate(sphere->m_Center);
 
 		deviceCommandContext->SetUniform(
 			shader->GetBindingSlot(str_instancingTransform),
 			instancingTransform.AsFloatArray());
 
 		deviceCommandContext->SetUniform(
 			shader->GetBindingSlot(str_color), sphere->m_Color.AsFloatArray());
 
 		deviceCommandContext->DrawIndexed(0, m->sphereIndexes.size(), 0);
 
 		g_Renderer.GetStats().m_DrawCalls++;
 		g_Renderer.GetStats().m_OverlayTris = m->sphereIndexes.size()/3;
 	}
 
 	deviceCommandContext->EndPass();
 }
Index: ps/trunk/source/renderer/OverlayRenderer.h
===================================================================
--- ps/trunk/source/renderer/OverlayRenderer.h	(revision 27181)
+++ ps/trunk/source/renderer/OverlayRenderer.h	(revision 27182)
@@ -1,159 +1,166 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef INCLUDED_OVERLAYRENDERER
 #define INCLUDED_OVERLAYRENDERER
 
 #include "graphics/ShaderProgram.h"
 #include "renderer/backend/IDeviceCommandContext.h"
 
 struct SOverlayLine;
 struct SOverlayTexturedLine;
 struct SOverlaySprite;
 struct SOverlayQuad;
 struct SOverlaySphere;
 class CCamera;
 
 struct OverlayRendererInternals;
 
 /**
  * Class OverlayRenderer: Render various bits of data that overlay the
  * game world (selection circles, health bars, etc).
  */
 class OverlayRenderer
 {
 	NONCOPYABLE(OverlayRenderer);
 
 public:
 	OverlayRenderer();
 	~OverlayRenderer();
 
 	/**
 	 * Performs one-time initialization. Called by CRenderer::Open after graphics
 	 * capabilities and the shader path have been determined.
 	 */
 	void Initialize();
 
 	/**
 	 * Add a line overlay for rendering in this frame.
 	 * @param overlay Must be non-null. The pointed-to object must remain valid at least
 	 *                until the end of the frame.
 	 */
 	void Submit(SOverlayLine* overlay);
 
 	/**
 	 * Add a textured line overlay for rendering in this frame.
 	 * @param overlay Must be non-null. The pointed-to object must remain valid at least
 	 *                until the end of the frame.
 	 */
 	void Submit(SOverlayTexturedLine* overlay);
 
 	/**
 	 * Add a sprite overlay for rendering in this frame.
 	 * @param overlay Must be non-null. The pointed-to object must remain valid at least
 	 *                until the end of the frame.
 	 */
 	void Submit(SOverlaySprite* overlay);
 
 	/**
 	 * Add a textured quad overlay for rendering in this frame.
 	 * @param overlay Must be non-null. The pointed-to object must remain valid at least
 	 *                until the end of the frame.
 	 */
 	void Submit(SOverlayQuad* overlay);
 
 	/**
 	 * Add a sphere overlay for rendering in this frame.
 	 * @param overlay Must be non-null. The pointed-to object must remain valid at least
 	 *                until the end of the frame.
 	 */
 	void Submit(SOverlaySphere* overlay);
 
 	/**
 	 * Prepare internal data structures for rendering.
 	 * Must be called after all Submit calls for a frame, and before
-	 * any rendering calls.
+	 * uploading data.
 	 */
 	void PrepareForRendering();
 
 	/**
+	 * Upload prepared data to backend.
+	 * Must be called after preparing, and before any rendering calls.
+	 */
+	void Upload(
+		Renderer::Backend::IDeviceCommandContext* deviceCommandContext);
+
+	/**
 	 * Reset the list of submitted overlays.
 	 */
 	void EndFrame();
 
 	/**
 	 * Render all the submitted overlays that are embedded in the world
 	 * (i.e. rendered behind other objects in the normal 3D way)
 	 * and should be drawn before water (i.e. may be visible under the water)
 	 */
 	void RenderOverlaysBeforeWater(
 		Renderer::Backend::IDeviceCommandContext* deviceCommandContext);
 
 	/**
 	 * Render all the submitted overlays that are embedded in the world
 	 * (i.e. rendered behind other objects in the normal 3D way)
 	 * and should be drawn after water (i.e. may be visible on top of the water)
 	 */
 	void RenderOverlaysAfterWater(
 		Renderer::Backend::IDeviceCommandContext* deviceCommandContext);
 
 	/**
 	 * Render all the submitted overlays that should appear on top of everything
 	 * in the world.
 	 * @param viewCamera camera to be used for billboard computations
 	 */
 	void RenderForegroundOverlays(
 		Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 		const CCamera& viewCamera);
 
 	/// Small vertical offset of overlays from terrain to prevent visual glitches
 	static const float OVERLAY_VOFFSET;
 
 private:
 
 	/**
 	 * Helper method; renders all overlay lines currently registered in the internals. Batch-
 	 * renders textured overlay lines batched according to their visibility status by delegating
 	 * to RenderTexturedOverlayLines(CShaderProgramPtr, bool).
 	 */
 	void RenderTexturedOverlayLines(Renderer::Backend::IDeviceCommandContext* deviceCommandContext);
 
 	/**
 	 * Helper method; renders those overlay lines currently registered in the internals (i.e.
 	 * in m->texlines) for which the 'always visible' flag equals @p alwaysVisible. Used for
 	 * batch rendering the overlay lines according to their alwaysVisible status, as this
 	 * requires a separate shader to be used.
 	 */
 	void RenderTexturedOverlayLines(
 		Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 		Renderer::Backend::IShaderProgram* shader, bool alwaysVisible);
 
 	/**
 	 * Helper method; batch-renders all registered quad overlays, batched by their texture for effiency.
 	 */
 	void RenderQuadOverlays(Renderer::Backend::IDeviceCommandContext* deviceCommandContext);
 
 	/**
 	 * Helper method; batch-renders all sphere quad overlays.
 	 */
 	 void RenderSphereOverlays(Renderer::Backend::IDeviceCommandContext* deviceCommandContext);
 
 private:
 	OverlayRendererInternals* m;
 };
 
 #endif // INCLUDED_OVERLAYRENDERER
Index: ps/trunk/source/renderer/PatchRData.cpp
===================================================================
--- ps/trunk/source/renderer/PatchRData.cpp	(revision 27181)
+++ ps/trunk/source/renderer/PatchRData.cpp	(revision 27182)
@@ -1,1559 +1,1558 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "renderer/PatchRData.h"
 
 #include "graphics/GameView.h"
 #include "graphics/LightEnv.h"
 #include "graphics/LOSTexture.h"
 #include "graphics/Patch.h"
 #include "graphics/ShaderManager.h"
 #include "graphics/Terrain.h"
 #include "graphics/TerrainTextureEntry.h"
 #include "graphics/TextRenderer.h"
 #include "graphics/TextureManager.h"
 #include "lib/allocators/DynamicArena.h"
 #include "lib/allocators/STLAllocators.h"
 #include "maths/MathUtil.h"
 #include "ps/CLogger.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Game.h"
 #include "ps/GameSetup/Config.h"
 #include "ps/Profile.h"
 #include "ps/Pyrogenesis.h"
 #include "ps/VideoMode.h"
 #include "ps/World.h"
 #include "renderer/AlphaMapCalculator.h"
 #include "renderer/DebugRenderer.h"
 #include "renderer/Renderer.h"
 #include "renderer/SceneRenderer.h"
 #include "renderer/TerrainRenderer.h"
 #include "renderer/WaterManager.h"
 #include "simulation2/components/ICmpWaterManager.h"
 #include "simulation2/Simulation2.h"
 
 #include <algorithm>
 #include <numeric>
 #include <set>
 
 const ssize_t BlendOffsets[9][2] = {
 	{  0, -1 },
 	{ -1, -1 },
 	{ -1,  0 },
 	{ -1,  1 },
 	{  0,  1 },
 	{  1,  1 },
 	{  1,  0 },
 	{  1, -1 },
 	{  0,  0 }
 };
 
 CPatchRData::CPatchRData(CPatch* patch, CSimulation2* simulation) :
 	m_Patch(patch), m_Simulation(simulation)
 {
 	ENSURE(patch);
 	Build();
 }
 
 CPatchRData::~CPatchRData() = default;
 
 /**
  * Represents a blend for a single tile, texture and shape.
  */
 struct STileBlend
 {
 	CTerrainTextureEntry* m_Texture;
 	int m_Priority;
 	u16 m_TileMask; // bit n set if this blend contains neighbour tile BlendOffsets[n]
 
 	struct DecreasingPriority
 	{
 		bool operator()(const STileBlend& a, const STileBlend& b) const
 		{
 			if (a.m_Priority > b.m_Priority)
 				return true;
 			if (a.m_Priority < b.m_Priority)
 				return false;
 			if (a.m_Texture && b.m_Texture)
 				return a.m_Texture->GetTag() > b.m_Texture->GetTag();
 			return false;
 		}
 	};
 
 	struct CurrentTile
 	{
 		bool operator()(const STileBlend& a) const
 		{
 			return (a.m_TileMask & (1 << 8)) != 0;
 		}
 	};
 };
 
 /**
  * Represents the ordered collection of blends drawn on a particular tile.
  */
 struct STileBlendStack
 {
 	u8 i, j;
 	std::vector<STileBlend> blends; // back of vector is lowest-priority texture
 };
 
 /**
  * Represents a batched collection of blends using the same texture.
  */
 struct SBlendLayer
 {
 	struct Tile
 	{
 		u8 i, j;
 		u8 shape;
 	};
 
 	CTerrainTextureEntry* m_Texture;
 	std::vector<Tile> m_Tiles;
 };
 
 void CPatchRData::BuildBlends()
 {
 	PROFILE3("build blends");
 
 	m_BlendSplats.clear();
 
 	std::vector<SBlendVertex> blendVertices;
 	std::vector<u16> blendIndices;
 
 	CTerrain* terrain = m_Patch->m_Parent;
 
 	std::vector<STileBlendStack> blendStacks;
 	blendStacks.reserve(PATCH_SIZE*PATCH_SIZE);
 
 	std::vector<STileBlend> blends;
 	blends.reserve(9);
 
 	// For each tile in patch ..
 	for (ssize_t j = 0; j < PATCH_SIZE; ++j)
 	{
 		for (ssize_t i = 0; i < PATCH_SIZE; ++i)
 		{
 			ssize_t gx = m_Patch->m_X * PATCH_SIZE + i;
 			ssize_t gz = m_Patch->m_Z * PATCH_SIZE + j;
 
 			blends.clear();
 
 			// Compute a blend for every tile in the 3x3 square around this tile
 			for (size_t n = 0; n < 9; ++n)
 			{
 				ssize_t ox = gx + BlendOffsets[n][1];
 				ssize_t oz = gz + BlendOffsets[n][0];
 
 				CMiniPatch* nmp = terrain->GetTile(ox, oz);
 				if (!nmp)
 					continue;
 
 				STileBlend blend;
 				blend.m_Texture = nmp->GetTextureEntry();
 				blend.m_Priority = nmp->GetPriority();
 				blend.m_TileMask = 1 << n;
 				blends.push_back(blend);
 			}
 
 			// Sort the blends, highest priority first
 			std::sort(blends.begin(), blends.end(), STileBlend::DecreasingPriority());
 
 			STileBlendStack blendStack;
 			blendStack.i = i;
 			blendStack.j = j;
 
 			// Put the blends into the tile's stack, merging any adjacent blends with the same texture
 			for (size_t k = 0; k < blends.size(); ++k)
 			{
 				if (!blendStack.blends.empty() && blendStack.blends.back().m_Texture == blends[k].m_Texture)
 					blendStack.blends.back().m_TileMask |= blends[k].m_TileMask;
 				else
 					blendStack.blends.push_back(blends[k]);
 			}
 
 			// Remove blends that are after (i.e. lower priority than) the current tile
 			// (including the current tile), since we don't want to render them on top of
 			// the tile's base texture
 			blendStack.blends.erase(
 				std::find_if(blendStack.blends.begin(), blendStack.blends.end(), STileBlend::CurrentTile()),
 				blendStack.blends.end());
 
 			blendStacks.push_back(blendStack);
 		}
 	}
 
 	// Given the blend stack per tile, we want to batch together as many blends as possible.
 	// Group them into a series of layers (each of which has a single texture):
 	// (This is effectively a topological sort / linearisation of the partial order induced
 	// by the per-tile stacks, preferring to make tiles with equal textures adjacent.)
 
 	std::vector<SBlendLayer> blendLayers;
 
 	while (true)
 	{
 		if (!blendLayers.empty())
 		{
 			// Try to grab as many tiles as possible that match our current layer,
 			// from off the blend stacks of all the tiles
 
 			CTerrainTextureEntry* tex = blendLayers.back().m_Texture;
 
 			for (size_t k = 0; k < blendStacks.size(); ++k)
 			{
 				if (!blendStacks[k].blends.empty() && blendStacks[k].blends.back().m_Texture == tex)
 				{
 					SBlendLayer::Tile t = { blendStacks[k].i, blendStacks[k].j, (u8)blendStacks[k].blends.back().m_TileMask };
 					blendLayers.back().m_Tiles.push_back(t);
 					blendStacks[k].blends.pop_back();
 				}
 				// (We've already merged adjacent entries of the same texture in each stack,
 				// so we don't need to bother looping to check the next entry in this stack again)
 			}
 		}
 
 		// We've grabbed as many tiles as possible; now we need to start a new layer.
 		// The new layer's texture could come from the back of any non-empty stack;
 		// choose the longest stack as a heuristic to reduce the number of layers
 		CTerrainTextureEntry* bestTex = NULL;
 		size_t bestStackSize = 0;
 
 		for (size_t k = 0; k < blendStacks.size(); ++k)
 		{
 			if (blendStacks[k].blends.size() > bestStackSize)
 			{
 				bestStackSize = blendStacks[k].blends.size();
 				bestTex = blendStacks[k].blends.back().m_Texture;
 			}
 		}
 
 		// If all our stacks were empty, we're done
 		if (bestStackSize == 0)
 			break;
 
 		// Otherwise add the new layer, then loop back and start filling it in
 
 		SBlendLayer layer;
 		layer.m_Texture = bestTex;
 		blendLayers.push_back(layer);
 	}
 
 	// Now build outgoing splats
 	m_BlendSplats.resize(blendLayers.size());
 
 	for (size_t k = 0; k < blendLayers.size(); ++k)
 	{
 		SSplat& splat = m_BlendSplats[k];
 		splat.m_IndexStart = blendIndices.size();
 		splat.m_Texture = blendLayers[k].m_Texture;
 
 		for (size_t t = 0; t < blendLayers[k].m_Tiles.size(); ++t)
 		{
 			SBlendLayer::Tile& tile = blendLayers[k].m_Tiles[t];
 			AddBlend(blendVertices, blendIndices, tile.i, tile.j, tile.shape, splat.m_Texture);
 		}
 
 		splat.m_IndexCount = blendIndices.size() - splat.m_IndexStart;
 	}
 
 	// Release existing vertex buffer chunks
 	m_VBBlends.Reset();
 	m_VBBlendIndices.Reset();
 
 	if (blendVertices.size())
 	{
 		// Construct vertex buffer
 
 		m_VBBlends = g_VBMan.AllocateChunk(
 			sizeof(SBlendVertex), blendVertices.size(),
 			Renderer::Backend::IBuffer::Type::VERTEX, false,
 			nullptr, CVertexBufferManager::Group::TERRAIN);
 		m_VBBlends->m_Owner->UpdateChunkVertices(m_VBBlends.Get(), &blendVertices[0]);
 
 		// Update the indices to include the base offset of the vertex data
 		for (size_t k = 0; k < blendIndices.size(); ++k)
 			blendIndices[k] += static_cast<u16>(m_VBBlends->m_Index);
 
 		m_VBBlendIndices = g_VBMan.AllocateChunk(
 			sizeof(u16), blendIndices.size(),
 			Renderer::Backend::IBuffer::Type::INDEX, false,
 			nullptr, CVertexBufferManager::Group::TERRAIN);
 		m_VBBlendIndices->m_Owner->UpdateChunkVertices(m_VBBlendIndices.Get(), &blendIndices[0]);
 	}
 }
 
 void CPatchRData::AddBlend(std::vector<SBlendVertex>& blendVertices, std::vector<u16>& blendIndices,
 			   u16 i, u16 j, u8 shape, CTerrainTextureEntry* texture)
 {
 	CTerrain* terrain = m_Patch->m_Parent;
 
 	ssize_t gx = m_Patch->m_X * PATCH_SIZE + i;
 	ssize_t gz = m_Patch->m_Z * PATCH_SIZE + j;
 
 	// uses the current neighbour texture
 	BlendShape8 shape8;
 	for (size_t m = 0; m < 8; ++m)
 		shape8[m] = (shape & (1 << m)) ? 0 : 1;
 
 	// calculate the required alphamap and the required rotation of the alphamap from blendshape
 	unsigned int alphamapflags;
 	int alphamap = CAlphaMapCalculator::Calculate(shape8, alphamapflags);
 
 	// now actually render the blend tile (if we need one)
 	if (alphamap == -1)
 		return;
 
 	float u0 = texture->m_TerrainAlpha->second.m_AlphaMapCoords[alphamap].u0;
 	float u1 = texture->m_TerrainAlpha->second.m_AlphaMapCoords[alphamap].u1;
 	float v0 = texture->m_TerrainAlpha->second.m_AlphaMapCoords[alphamap].v0;
 	float v1 = texture->m_TerrainAlpha->second.m_AlphaMapCoords[alphamap].v1;
 
 	if (alphamapflags & BLENDMAP_FLIPU)
 		std::swap(u0, u1);
 
 	if (alphamapflags & BLENDMAP_FLIPV)
 		std::swap(v0, v1);
 
 	int base = 0;
 	if (alphamapflags & BLENDMAP_ROTATE90)
 		base = 1;
 	else if (alphamapflags & BLENDMAP_ROTATE180)
 		base = 2;
 	else if (alphamapflags & BLENDMAP_ROTATE270)
 		base = 3;
 
 	SBlendVertex vtx[4];
 	vtx[(base + 0) % 4].m_AlphaUVs[0] = u0;
 	vtx[(base + 0) % 4].m_AlphaUVs[1] = v0;
 	vtx[(base + 1) % 4].m_AlphaUVs[0] = u1;
 	vtx[(base + 1) % 4].m_AlphaUVs[1] = v0;
 	vtx[(base + 2) % 4].m_AlphaUVs[0] = u1;
 	vtx[(base + 2) % 4].m_AlphaUVs[1] = v1;
 	vtx[(base + 3) % 4].m_AlphaUVs[0] = u0;
 	vtx[(base + 3) % 4].m_AlphaUVs[1] = v1;
 
 	SBlendVertex dst;
 
 	CVector3D normal;
 
 	u16 index = static_cast<u16>(blendVertices.size());
 
 	terrain->CalcPosition(gx, gz, dst.m_Position);
 	terrain->CalcNormal(gx, gz, normal);
 	dst.m_Normal = normal;
 	dst.m_AlphaUVs[0] = vtx[0].m_AlphaUVs[0];
 	dst.m_AlphaUVs[1] = vtx[0].m_AlphaUVs[1];
 	blendVertices.push_back(dst);
 
 	terrain->CalcPosition(gx + 1, gz, dst.m_Position);
 	terrain->CalcNormal(gx + 1, gz, normal);
 	dst.m_Normal = normal;
 	dst.m_AlphaUVs[0] = vtx[1].m_AlphaUVs[0];
 	dst.m_AlphaUVs[1] = vtx[1].m_AlphaUVs[1];
 	blendVertices.push_back(dst);
 
 	terrain->CalcPosition(gx + 1, gz + 1, dst.m_Position);
 	terrain->CalcNormal(gx + 1, gz + 1, normal);
 	dst.m_Normal = normal;
 	dst.m_AlphaUVs[0] = vtx[2].m_AlphaUVs[0];
 	dst.m_AlphaUVs[1] = vtx[2].m_AlphaUVs[1];
 	blendVertices.push_back(dst);
 
 	terrain->CalcPosition(gx, gz + 1, dst.m_Position);
 	terrain->CalcNormal(gx, gz + 1, normal);
 	dst.m_Normal = normal;
 	dst.m_AlphaUVs[0] = vtx[3].m_AlphaUVs[0];
 	dst.m_AlphaUVs[1] = vtx[3].m_AlphaUVs[1];
 	blendVertices.push_back(dst);
 
 	bool dir = terrain->GetTriangulationDir(gx, gz);
 	if (dir)
 	{
 		blendIndices.push_back(index+0);
 		blendIndices.push_back(index+1);
 		blendIndices.push_back(index+3);
 
 		blendIndices.push_back(index+1);
 		blendIndices.push_back(index+2);
 		blendIndices.push_back(index+3);
 	}
 	else
 	{
 		blendIndices.push_back(index+0);
 		blendIndices.push_back(index+1);
 		blendIndices.push_back(index+2);
 
 		blendIndices.push_back(index+2);
 		blendIndices.push_back(index+3);
 		blendIndices.push_back(index+0);
 	}
 }
 
 void CPatchRData::BuildIndices()
 {
 	PROFILE3("build indices");
 
 	CTerrain* terrain = m_Patch->m_Parent;
 
 	ssize_t px = m_Patch->m_X * PATCH_SIZE;
 	ssize_t pz = m_Patch->m_Z * PATCH_SIZE;
 
 	// must have allocated some vertices before trying to build corresponding indices
 	ENSURE(m_VBBase);
 
 	// number of vertices in each direction in each patch
 	ssize_t vsize=PATCH_SIZE+1;
 
 	// PATCH_SIZE must be 2^8-2 or less to not overflow u16 indices buffer. Thankfully this is always true.
 	ENSURE(vsize*vsize < 65536);
 
 	std::vector<unsigned short> indices;
 	indices.reserve(PATCH_SIZE * PATCH_SIZE * 4);
 
 	// release existing splats
 	m_Splats.clear();
 
 	// build grid of textures on this patch
 	std::vector<CTerrainTextureEntry*> textures;
 	CTerrainTextureEntry* texgrid[PATCH_SIZE][PATCH_SIZE];
 	for (ssize_t j=0;j<PATCH_SIZE;j++) {
 		for (ssize_t i=0;i<PATCH_SIZE;i++) {
 			CTerrainTextureEntry* tex=m_Patch->m_MiniPatches[j][i].GetTextureEntry();
 			texgrid[j][i]=tex;
 			if (std::find(textures.begin(),textures.end(),tex)==textures.end()) {
 				textures.push_back(tex);
 			}
 		}
 	}
 
 	// now build base splats from interior textures
 	m_Splats.resize(textures.size());
 	// build indices for base splats
 	size_t base=m_VBBase->m_Index;
 
 	for (size_t k = 0; k < m_Splats.size(); ++k)
 	{
 		CTerrainTextureEntry* tex = textures[k];
 
 		SSplat& splat=m_Splats[k];
 		splat.m_Texture=tex;
 		splat.m_IndexStart=indices.size();
 
 		for (ssize_t j = 0; j < PATCH_SIZE; j++)
 		{
 			for (ssize_t i = 0; i < PATCH_SIZE; i++)
 			{
 				if (texgrid[j][i] == tex)
 				{
 					bool dir = terrain->GetTriangulationDir(px+i, pz+j);
 					if (dir)
 					{
 						indices.push_back(u16(((j+0)*vsize+(i+0))+base));
 						indices.push_back(u16(((j+0)*vsize+(i+1))+base));
 						indices.push_back(u16(((j+1)*vsize+(i+0))+base));
 
 						indices.push_back(u16(((j+0)*vsize+(i+1))+base));
 						indices.push_back(u16(((j+1)*vsize+(i+1))+base));
 						indices.push_back(u16(((j+1)*vsize+(i+0))+base));
 					}
 					else
 					{
 						indices.push_back(u16(((j+0)*vsize+(i+0))+base));
 						indices.push_back(u16(((j+0)*vsize+(i+1))+base));
 						indices.push_back(u16(((j+1)*vsize+(i+1))+base));
 
 						indices.push_back(u16(((j+1)*vsize+(i+1))+base));
 						indices.push_back(u16(((j+1)*vsize+(i+0))+base));
 						indices.push_back(u16(((j+0)*vsize+(i+0))+base));
 					}
 				}
 			}
 		}
 		splat.m_IndexCount=indices.size()-splat.m_IndexStart;
 	}
 
 	// Release existing vertex buffer chunk
 	m_VBBaseIndices.Reset();
 
 	ENSURE(indices.size());
 
 	// Construct vertex buffer
 	m_VBBaseIndices = g_VBMan.AllocateChunk(
 		sizeof(u16), indices.size(),
 		Renderer::Backend::IBuffer::Type::INDEX, false, nullptr, CVertexBufferManager::Group::TERRAIN);
 	m_VBBaseIndices->m_Owner->UpdateChunkVertices(m_VBBaseIndices.Get(), &indices[0]);
 }
 
 
 void CPatchRData::BuildVertices()
 {
 	PROFILE3("build vertices");
 
 	// create both vertices and lighting colors
 
 	// number of vertices in each direction in each patch
 	ssize_t vsize = PATCH_SIZE + 1;
 
 	std::vector<SBaseVertex> vertices;
 	vertices.resize(vsize * vsize);
 
 	// get index of this patch
 	ssize_t px = m_Patch->m_X;
 	ssize_t pz = m_Patch->m_Z;
 
 	CTerrain* terrain = m_Patch->m_Parent;
 
 	// build vertices
 	for (ssize_t j = 0; j < vsize; ++j)
 	{
 		for (ssize_t i = 0; i < vsize; ++i)
 		{
 			ssize_t ix = px * PATCH_SIZE + i;
 			ssize_t iz = pz * PATCH_SIZE + j;
 			ssize_t v = j * vsize + i;
 
 			// calculate vertex data
 			terrain->CalcPosition(ix, iz, vertices[v].m_Position);
 
 			CVector3D normal;
 			terrain->CalcNormal(ix, iz, normal);
 			vertices[v].m_Normal = normal;
 		}
 	}
 
 	// upload to vertex buffer
 	if (!m_VBBase)
 	{
 		m_VBBase = g_VBMan.AllocateChunk(
 			sizeof(SBaseVertex), vsize * vsize,
 			Renderer::Backend::IBuffer::Type::VERTEX, false,
 			nullptr, CVertexBufferManager::Group::TERRAIN);
 	}
 
 	m_VBBase->m_Owner->UpdateChunkVertices(m_VBBase.Get(), &vertices[0]);
 }
 
 void CPatchRData::BuildSide(std::vector<SSideVertex>& vertices, CPatchSideFlags side)
 {
 	ssize_t vsize = PATCH_SIZE + 1;
 	CTerrain* terrain = m_Patch->m_Parent;
 	CmpPtr<ICmpWaterManager> cmpWaterManager(*m_Simulation, SYSTEM_ENTITY);
 
 	for (ssize_t k = 0; k < vsize; k++)
 	{
 		ssize_t gx = m_Patch->m_X * PATCH_SIZE;
 		ssize_t gz = m_Patch->m_Z * PATCH_SIZE;
 		switch (side)
 		{
 		case CPATCH_SIDE_NEGX: gz += k; break;
 		case CPATCH_SIDE_POSX: gx += PATCH_SIZE; gz += PATCH_SIZE-k; break;
 		case CPATCH_SIDE_NEGZ: gx += PATCH_SIZE-k; break;
 		case CPATCH_SIDE_POSZ: gz += PATCH_SIZE; gx += k; break;
 		}
 
 		CVector3D pos;
 		terrain->CalcPosition(gx, gz, pos);
 
 		// Clamp the height to the water level
 		float waterHeight = 0.f;
 		if (cmpWaterManager)
 			waterHeight = cmpWaterManager->GetExactWaterLevel(pos.X, pos.Z);
 		pos.Y = std::max(pos.Y, waterHeight);
 
 		SSideVertex v0, v1;
 		v0.m_Position = pos;
 		v1.m_Position = pos;
 		v1.m_Position.Y = 0;
 
 		if (k == 0)
 		{
 			vertices.emplace_back(v1);
 			vertices.emplace_back(v0);
 		}
 		if (k > 0)
 		{
 			const size_t lastIndex = vertices.size() - 1;
 			vertices.emplace_back(v1);
 			vertices.emplace_back(vertices[lastIndex]);
 			vertices.emplace_back(v0);
 			vertices.emplace_back(v1);
 			if (k + 1 < vsize)
 			{
 				vertices.emplace_back(v1);
 				vertices.emplace_back(v0);
 			}
 		}
 	}
 }
 
 void CPatchRData::BuildSides()
 {
 	PROFILE3("build sides");
 
 	std::vector<SSideVertex> sideVertices;
 
 	int sideFlags = m_Patch->GetSideFlags();
 
 	// If no sides are enabled, we don't need to do anything
 	if (!sideFlags)
 		return;
 
 	// For each side, generate a tristrip by adding a vertex at ground/water
 	// level and a vertex underneath at height 0.
 
 	if (sideFlags & CPATCH_SIDE_NEGX)
 		BuildSide(sideVertices, CPATCH_SIDE_NEGX);
 
 	if (sideFlags & CPATCH_SIDE_POSX)
 		BuildSide(sideVertices, CPATCH_SIDE_POSX);
 
 	if (sideFlags & CPATCH_SIDE_NEGZ)
 		BuildSide(sideVertices, CPATCH_SIDE_NEGZ);
 
 	if (sideFlags & CPATCH_SIDE_POSZ)
 		BuildSide(sideVertices, CPATCH_SIDE_POSZ);
 
 	if (sideVertices.empty())
 		return;
 
 	if (!m_VBSides)
 	{
 		m_VBSides = g_VBMan.AllocateChunk(
 			sizeof(SSideVertex), sideVertices.size(),
 			Renderer::Backend::IBuffer::Type::VERTEX, false,
 			nullptr, CVertexBufferManager::Group::DEFAULT);
 	}
 	m_VBSides->m_Owner->UpdateChunkVertices(m_VBSides.Get(), &sideVertices[0]);
 }
 
 void CPatchRData::Build()
 {
 	BuildVertices();
 	BuildSides();
 	BuildIndices();
 	BuildBlends();
 	BuildWater();
 }
 
 void CPatchRData::Update(CSimulation2* simulation)
 {
 	m_Simulation = simulation;
 	if (m_UpdateFlags!=0) {
 		// TODO,RC 11/04/04 - need to only rebuild necessary bits of renderdata rather
 		// than everything; it's complicated slightly because the blends are dependent
 		// on both vertex and index data
 		BuildVertices();
 		BuildSides();
 		BuildIndices();
 		BuildBlends();
 		BuildWater();
 
 		m_UpdateFlags=0;
 	}
 }
 
 // To minimise the cost of memory allocations, everything used for computing
 // batches uses a arena allocator. (All allocations are short-lived so we can
 // just throw away the whole arena at the end of each frame.)
 
 using Arena = Allocators::DynamicArena<1 * MiB>;
 
 // std::map types with appropriate arena allocators and default comparison operator
 template<class Key, class Value>
 using PooledBatchMap = std::map<Key, Value, std::less<Key>, ProxyAllocator<std::pair<Key const, Value>, Arena>>;
 
 // Equivalent to "m[k]", when it returns a arena-allocated std::map (since we can't
 // use the default constructor in that case)
 template<typename M>
 typename M::mapped_type& PooledMapGet(M& m, const typename M::key_type& k, Arena& arena)
 {
 	return m.insert(std::make_pair(k,
 		typename M::mapped_type(typename M::mapped_type::key_compare(), typename M::mapped_type::allocator_type(arena))
 	)).first->second;
 }
 
 // Equivalent to "m[k]", when it returns a std::pair of arena-allocated std::vectors
 template<typename M>
 typename M::mapped_type& PooledPairGet(M& m, const typename M::key_type& k, Arena& arena)
 {
 	return m.insert(std::make_pair(k, std::make_pair(
 			typename M::mapped_type::first_type(typename M::mapped_type::first_type::allocator_type(arena)),
 			typename M::mapped_type::second_type(typename M::mapped_type::second_type::allocator_type(arena))
 	))).first->second;
 }
 
 // Each multidraw batch has a list of index counts, and a list of pointers-to-first-indexes
 using BatchElements = std::pair<std::vector<u32, ProxyAllocator<u32, Arena>>, std::vector<u32, ProxyAllocator<u32, Arena>>>;
 
 // Group batches by index buffer
 using IndexBufferBatches = PooledBatchMap<CVertexBuffer*, BatchElements>;
 
 // Group batches by vertex buffer
 using VertexBufferBatches = PooledBatchMap<CVertexBuffer*, IndexBufferBatches>;
 
 // Group batches by texture
 using TextureBatches = PooledBatchMap<CTerrainTextureEntry*, VertexBufferBatches>;
 
 // Group batches by shaders.
 using ShaderTechniqueBatches = PooledBatchMap<std::pair<CStrIntern, CShaderDefines>, TextureBatches>;
 
 void CPatchRData::RenderBases(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const std::vector<CPatchRData*>& patches, const CShaderDefines& context, ShadowMap* shadow)
 {
 	PROFILE3("render terrain bases");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render terrain bases");
 
 	Arena arena;
 
 	ShaderTechniqueBatches batches(ShaderTechniqueBatches::key_compare(), (ShaderTechniqueBatches::allocator_type(arena)));
 
 	PROFILE_START("compute batches");
 
 	// Collect all the patches' base splats into their appropriate batches
 	for (size_t i = 0; i < patches.size(); ++i)
 	{
 		CPatchRData* patch = patches[i];
 		for (size_t j = 0; j < patch->m_Splats.size(); ++j)
 		{
 			SSplat& splat = patch->m_Splats[j];
 			const CMaterial& material = splat.m_Texture->GetMaterial();
 			if (material.GetShaderEffect().empty())
 			{
 				LOGERROR("Terrain renderer failed to load shader effect.\n");
 				continue;
 			}
 
 			BatchElements& batch = PooledPairGet(
 				PooledMapGet(
 					PooledMapGet(
 						PooledMapGet(batches, std::make_pair(material.GetShaderEffect(), material.GetShaderDefines()), arena),
 						splat.m_Texture, arena
 					),
 					patch->m_VBBase->m_Owner, arena
 				),
 				patch->m_VBBaseIndices->m_Owner, arena
 			);
 
 			batch.first.push_back(splat.m_IndexCount);
 
 			batch.second.push_back(patch->m_VBBaseIndices->m_Index + splat.m_IndexStart);
 		}
 	}
 
 	PROFILE_END("compute batches");
 
 	// Render each batch
 	for (ShaderTechniqueBatches::iterator itTech = batches.begin(); itTech != batches.end(); ++itTech)
 	{
 		CShaderDefines defines = context;
 		defines.SetMany(itTech->first.second);
 		CShaderTechniquePtr techBase = g_Renderer.GetShaderManager().LoadEffect(
 			itTech->first.first, defines);
 
 		const int numPasses = techBase->GetNumPasses();
 		for (int pass = 0; pass < numPasses; ++pass)
 		{
 			deviceCommandContext->SetGraphicsPipelineState(
 				techBase->GetGraphicsPipelineStateDesc(pass));
 			deviceCommandContext->BeginPass();
 			Renderer::Backend::IShaderProgram* shader = techBase->GetShader(pass);
 			TerrainRenderer::PrepareShader(deviceCommandContext, shader, shadow);
 
 			const int32_t baseTexBindingSlot =
 				shader->GetBindingSlot(str_baseTex);
 			const int32_t textureTransformBindingSlot =
 				shader->GetBindingSlot(str_textureTransform);
 
 			TextureBatches& textureBatches = itTech->second;
 			for (TextureBatches::iterator itt = textureBatches.begin(); itt != textureBatches.end(); ++itt)
 			{
 				if (!itt->first->GetMaterial().GetSamplers().empty())
 				{
 					const CMaterial::SamplersVector& samplers =
 						itt->first->GetMaterial().GetSamplers();
 					for(const CMaterial::TextureSampler& samp : samplers)
 						samp.Sampler->UploadBackendTextureIfNeeded(deviceCommandContext);
 					for(const CMaterial::TextureSampler& samp : samplers)
 					{
 						deviceCommandContext->SetTexture(
 							shader->GetBindingSlot(samp.Name),
 							samp.Sampler->GetBackendTexture());
 					}
 
 					itt->first->GetMaterial().GetStaticUniforms().BindUniforms(
 						deviceCommandContext, shader);
 
 					float c = itt->first->GetTextureMatrix()[0];
 					float ms = itt->first->GetTextureMatrix()[8];
 					deviceCommandContext->SetUniform(
 						textureTransformBindingSlot, c, ms);
 				}
 				else
 				{
 					deviceCommandContext->SetTexture(
 						baseTexBindingSlot,
 						g_Renderer.GetTextureManager().GetErrorTexture()->GetBackendTexture());
 				}
 
 				for (VertexBufferBatches::iterator itv = itt->second.begin(); itv != itt->second.end(); ++itv)
 				{
-					itv->first->UploadIfNeeded(deviceCommandContext);
+					ENSURE(!itv->first->GetBuffer()->IsDynamic());
 
 					const uint32_t stride = sizeof(SBaseVertex);
 
 					deviceCommandContext->SetVertexAttributeFormat(
 						Renderer::Backend::VertexAttributeStream::POSITION,
 						Renderer::Backend::Format::R32G32B32_SFLOAT,
 						offsetof(SBaseVertex, m_Position), stride,
 						Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 					deviceCommandContext->SetVertexAttributeFormat(
 						Renderer::Backend::VertexAttributeStream::NORMAL,
 						Renderer::Backend::Format::R32G32B32_SFLOAT,
 						offsetof(SBaseVertex, m_Normal), stride,
 						Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 					deviceCommandContext->SetVertexAttributeFormat(
 						Renderer::Backend::VertexAttributeStream::UV0,
 						Renderer::Backend::Format::R32G32B32_SFLOAT,
 						offsetof(SBaseVertex, m_Position), stride,
 						Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 
 					deviceCommandContext->SetVertexBuffer(0, itv->first->GetBuffer(), 0);
 
 					for (IndexBufferBatches::iterator it = itv->second.begin(); it != itv->second.end(); ++it)
 					{
-						it->first->UploadIfNeeded(deviceCommandContext);
+						ENSURE(!it->first->GetBuffer()->IsDynamic());
 						deviceCommandContext->SetIndexBuffer(it->first->GetBuffer());
 
 						BatchElements& batch = it->second;
 
 						for (size_t i = 0; i < batch.first.size(); ++i)
 							deviceCommandContext->DrawIndexed(batch.second[i], batch.first[i], 0);
 
 						g_Renderer.m_Stats.m_DrawCalls++;
 						g_Renderer.m_Stats.m_TerrainTris += std::accumulate(batch.first.begin(), batch.first.end(), 0) / 3;
 					}
 				}
 			}
 			deviceCommandContext->EndPass();
 		}
 	}
 }
 
 /**
  * Helper structure for RenderBlends.
  */
 struct SBlendBatch
 {
 	SBlendBatch(Arena& arena) :
 		m_Batches(VertexBufferBatches::key_compare(), VertexBufferBatches::allocator_type(arena))
 	{
 	}
 
 	CTerrainTextureEntry* m_Texture;
 	CShaderTechniquePtr m_ShaderTech;
 	VertexBufferBatches m_Batches;
 };
 
 /**
  * Helper structure for RenderBlends.
  */
 struct SBlendStackItem
 {
 	SBlendStackItem(CVertexBuffer::VBChunk* v, CVertexBuffer::VBChunk* i,
 			const std::vector<CPatchRData::SSplat>& s, Arena& arena) :
 		vertices(v), indices(i), splats(s.begin(), s.end(), SplatStack::allocator_type(arena))
 	{
 	}
 
 	using SplatStack = std::vector<CPatchRData::SSplat, ProxyAllocator<CPatchRData::SSplat, Arena>>;
 	CVertexBuffer::VBChunk* vertices;
 	CVertexBuffer::VBChunk* indices;
 	SplatStack splats;
 };
 
 void CPatchRData::RenderBlends(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const std::vector<CPatchRData*>& patches, const CShaderDefines& context, ShadowMap* shadow)
 {
 	PROFILE3("render terrain blends");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render terrain blends");
 
 	Arena arena;
 
 	using BatchesStack = std::vector<SBlendBatch, ProxyAllocator<SBlendBatch, Arena>>;
 	BatchesStack batches((BatchesStack::allocator_type(arena)));
 
 	CShaderDefines contextBlend = context;
 	contextBlend.Add(str_BLEND, str_1);
 
  	PROFILE_START("compute batches");
 
  	// Reserve an arbitrary size that's probably big enough in most cases,
  	// to avoid heavy reallocations
  	batches.reserve(256);
 
 	using BlendStacks = std::vector<SBlendStackItem, ProxyAllocator<SBlendStackItem, Arena>>;
 	BlendStacks blendStacks((BlendStacks::allocator_type(arena)));
 	blendStacks.reserve(patches.size());
 
 	// Extract all the blend splats from each patch
  	for (size_t i = 0; i < patches.size(); ++i)
  	{
  		CPatchRData* patch = patches[i];
  		if (!patch->m_BlendSplats.empty())
  		{
 
  			blendStacks.push_back(SBlendStackItem(patch->m_VBBlends.Get(), patch->m_VBBlendIndices.Get(), patch->m_BlendSplats, arena));
  			// Reverse the splats so the first to be rendered is at the back of the list
  			std::reverse(blendStacks.back().splats.begin(), blendStacks.back().splats.end());
  		}
  	}
 
  	// Rearrange the collection of splats to be grouped by texture, preserving
  	// order of splats within each patch:
  	// (This is exactly the same algorithm used in CPatchRData::BuildBlends,
  	// but applied to patch-sized splats rather than to tile-sized splats;
  	// see that function for comments on the algorithm.)
 	while (true)
 	{
 		if (!batches.empty())
 		{
 			CTerrainTextureEntry* tex = batches.back().m_Texture;
 
 			for (size_t k = 0; k < blendStacks.size(); ++k)
 			{
 				SBlendStackItem::SplatStack& splats = blendStacks[k].splats;
 				if (!splats.empty() && splats.back().m_Texture == tex)
 				{
 					CVertexBuffer::VBChunk* vertices = blendStacks[k].vertices;
 					CVertexBuffer::VBChunk* indices = blendStacks[k].indices;
 
 					BatchElements& batch = PooledPairGet(PooledMapGet(batches.back().m_Batches, vertices->m_Owner, arena), indices->m_Owner, arena);
 					batch.first.push_back(splats.back().m_IndexCount);
 
 		 			batch.second.push_back(indices->m_Index + splats.back().m_IndexStart);
 
 					splats.pop_back();
 				}
 			}
 		}
 
 		CTerrainTextureEntry* bestTex = NULL;
 		size_t bestStackSize = 0;
 
 		for (size_t k = 0; k < blendStacks.size(); ++k)
 		{
 			SBlendStackItem::SplatStack& splats = blendStacks[k].splats;
 			if (splats.size() > bestStackSize)
 			{
 				bestStackSize = splats.size();
 				bestTex = splats.back().m_Texture;
 			}
 		}
 
 		if (bestStackSize == 0)
 			break;
 
 		SBlendBatch layer(arena);
 		layer.m_Texture = bestTex;
 		if (!bestTex->GetMaterial().GetSamplers().empty())
 		{
 			CShaderDefines defines = contextBlend;
 			defines.SetMany(bestTex->GetMaterial().GetShaderDefines());
 			layer.m_ShaderTech = g_Renderer.GetShaderManager().LoadEffect(
 				bestTex->GetMaterial().GetShaderEffect(), defines);
 		}
 		batches.push_back(layer);
 	}
 
 	PROFILE_END("compute batches");
 
 	CVertexBuffer* lastVB = nullptr;
 	Renderer::Backend::IShaderProgram* previousShader = nullptr;
 	for (BatchesStack::iterator itTechBegin = batches.begin(), itTechEnd = batches.begin(); itTechBegin != batches.end(); itTechBegin = itTechEnd)
 	{
 		while (itTechEnd != batches.end() && itTechEnd->m_ShaderTech == itTechBegin->m_ShaderTech)
 			++itTechEnd;
 
 		const CShaderTechniquePtr& techBase = itTechBegin->m_ShaderTech;
 		const int numPasses = techBase->GetNumPasses();
 		for (int pass = 0; pass < numPasses; ++pass)
 		{
 			Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 				techBase->GetGraphicsPipelineStateDesc(pass);
 			pipelineStateDesc.blendState.enabled = true;
 			pipelineStateDesc.blendState.srcColorBlendFactor = pipelineStateDesc.blendState.srcAlphaBlendFactor =
 				Renderer::Backend::BlendFactor::SRC_ALPHA;
 			pipelineStateDesc.blendState.dstColorBlendFactor = pipelineStateDesc.blendState.dstAlphaBlendFactor =
 				Renderer::Backend::BlendFactor::ONE_MINUS_SRC_ALPHA;
 			pipelineStateDesc.blendState.colorBlendOp = pipelineStateDesc.blendState.alphaBlendOp =
 				Renderer::Backend::BlendOp::ADD;
 			deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 			deviceCommandContext->BeginPass();
 
 			Renderer::Backend::IShaderProgram* shader = techBase->GetShader(pass);
 			TerrainRenderer::PrepareShader(deviceCommandContext, shader, shadow);
 
 			Renderer::Backend::ITexture* lastBlendTex = nullptr;
 
 			const int32_t baseTexBindingSlot =
 				shader->GetBindingSlot(str_baseTex);
 			const int32_t blendTexBindingSlot =
 				shader->GetBindingSlot(str_blendTex);
 			const int32_t textureTransformBindingSlot =
 				shader->GetBindingSlot(str_textureTransform);
 
 			for (BatchesStack::iterator itt = itTechBegin; itt != itTechEnd; ++itt)
 			{
 				if (itt->m_Texture->GetMaterial().GetSamplers().empty())
 					continue;
 
 				if (itt->m_Texture)
 				{
 					const CMaterial::SamplersVector& samplers = itt->m_Texture->GetMaterial().GetSamplers();
 					for (const CMaterial::TextureSampler& samp : samplers)
 						samp.Sampler->UploadBackendTextureIfNeeded(deviceCommandContext);
 					for (const CMaterial::TextureSampler& samp : samplers)
 					{
 						deviceCommandContext->SetTexture(
 							shader->GetBindingSlot(samp.Name),
 							samp.Sampler->GetBackendTexture());
 					}
 
 					Renderer::Backend::ITexture* currentBlendTex = itt->m_Texture->m_TerrainAlpha->second.m_CompositeAlphaMap.get();
 					if (currentBlendTex != lastBlendTex)
 					{
 						deviceCommandContext->SetTexture(
 							blendTexBindingSlot, currentBlendTex);
 						lastBlendTex = currentBlendTex;
 					}
 
 					itt->m_Texture->GetMaterial().GetStaticUniforms().BindUniforms(deviceCommandContext, shader);
 
 					float c = itt->m_Texture->GetTextureMatrix()[0];
 					float ms = itt->m_Texture->GetTextureMatrix()[8];
 					deviceCommandContext->SetUniform(
 						textureTransformBindingSlot, c, ms);
 				}
 				else
 				{
 					deviceCommandContext->SetTexture(
 						baseTexBindingSlot, g_Renderer.GetTextureManager().GetErrorTexture()->GetBackendTexture());
 				}
 
 				for (VertexBufferBatches::iterator itv = itt->m_Batches.begin(); itv != itt->m_Batches.end(); ++itv)
 				{
 					// Rebind the VB only if it changed since the last batch
 					if (itv->first != lastVB || shader != previousShader)
 					{
 						lastVB = itv->first;
 						previousShader = shader;
 
-						itv->first->UploadIfNeeded(deviceCommandContext);
+						ENSURE(!itv->first->GetBuffer()->IsDynamic());
 
 						const uint32_t stride = sizeof(SBlendVertex);
 
 						deviceCommandContext->SetVertexAttributeFormat(
 							Renderer::Backend::VertexAttributeStream::POSITION,
 							Renderer::Backend::Format::R32G32B32_SFLOAT,
 							offsetof(SBlendVertex, m_Position), stride,
 							Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 						deviceCommandContext->SetVertexAttributeFormat(
 							Renderer::Backend::VertexAttributeStream::NORMAL,
 							Renderer::Backend::Format::R32G32B32_SFLOAT,
 							offsetof(SBlendVertex, m_Normal), stride,
 							Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 						deviceCommandContext->SetVertexAttributeFormat(
 							Renderer::Backend::VertexAttributeStream::UV0,
 							Renderer::Backend::Format::R32G32B32_SFLOAT,
 							offsetof(SBlendVertex, m_Position), stride,
 							Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 						deviceCommandContext->SetVertexAttributeFormat(
 							Renderer::Backend::VertexAttributeStream::UV1,
 							Renderer::Backend::Format::R32G32_SFLOAT,
 							offsetof(SBlendVertex, m_AlphaUVs), stride,
 							Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 
 						deviceCommandContext->SetVertexBuffer(0, itv->first->GetBuffer(), 0);
 					}
 
 					for (IndexBufferBatches::iterator it = itv->second.begin(); it != itv->second.end(); ++it)
 					{
-						it->first->UploadIfNeeded(deviceCommandContext);
+						ENSURE(!it->first->GetBuffer()->IsDynamic());
 						deviceCommandContext->SetIndexBuffer(it->first->GetBuffer());
 
 						BatchElements& batch = it->second;
 
 						for (size_t i = 0; i < batch.first.size(); ++i)
 							deviceCommandContext->DrawIndexed(batch.second[i], batch.first[i], 0);
 
 						g_Renderer.m_Stats.m_DrawCalls++;
 						g_Renderer.m_Stats.m_BlendSplats++;
 						g_Renderer.m_Stats.m_TerrainTris += std::accumulate(batch.first.begin(), batch.first.end(), 0) / 3;
 					}
 				}
 			}
 			deviceCommandContext->EndPass();
 		}
 	}
 }
 
 void CPatchRData::RenderStreams(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const std::vector<CPatchRData*>& patches, const bool bindPositionAsTexCoord)
 {
 	PROFILE3("render terrain streams");
 
 	// Each batch has a list of index counts, and a list of pointers-to-first-indexes
 	using StreamBatchElements = std::pair<std::vector<u32>, std::vector<u32>>;
 
 	// Group batches by index buffer
 	using StreamIndexBufferBatches = std::map<CVertexBuffer*, StreamBatchElements>;
 
 	// Group batches by vertex buffer
 	using StreamVertexBufferBatches = std::map<CVertexBuffer*, StreamIndexBufferBatches>;
 
 	StreamVertexBufferBatches batches;
 
  	PROFILE_START("compute batches");
 
  	// Collect all the patches into their appropriate batches
 	for (const CPatchRData* patch : patches)
 	{
 		StreamBatchElements& batch = batches[patch->m_VBBase->m_Owner][patch->m_VBBaseIndices->m_Owner];
 
 		batch.first.push_back(patch->m_VBBaseIndices->m_Count);
 
  		batch.second.push_back(patch->m_VBBaseIndices->m_Index);
  	}
 
  	PROFILE_END("compute batches");
 
 	const uint32_t stride = sizeof(SBaseVertex);
 
 	deviceCommandContext->SetVertexAttributeFormat(
 		Renderer::Backend::VertexAttributeStream::POSITION,
 		Renderer::Backend::Format::R32G32B32_SFLOAT,
 		offsetof(SBaseVertex, m_Position), stride,
 		Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 	if (bindPositionAsTexCoord)
 	{
 		deviceCommandContext->SetVertexAttributeFormat(
 			Renderer::Backend::VertexAttributeStream::UV0,
 			Renderer::Backend::Format::R32G32B32_SFLOAT,
 			offsetof(SBaseVertex, m_Position), stride,
 			Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 	}
 
  	// Render each batch
 	for (const std::pair<CVertexBuffer* const, StreamIndexBufferBatches>& streamBatch : batches)
 	{
-		streamBatch.first->UploadIfNeeded(deviceCommandContext);
+		ENSURE(!streamBatch.first->GetBuffer()->IsDynamic());
 
 		deviceCommandContext->SetVertexBuffer(0, streamBatch.first->GetBuffer(), 0);
 
 		for (const std::pair<CVertexBuffer* const, StreamBatchElements>& batchIndexBuffer : streamBatch.second)
 		{
-			batchIndexBuffer.first->UploadIfNeeded(deviceCommandContext);
+			ENSURE(!batchIndexBuffer.first->GetBuffer()->IsDynamic());
 			deviceCommandContext->SetIndexBuffer(batchIndexBuffer.first->GetBuffer());
 
 			const StreamBatchElements& batch = batchIndexBuffer.second;
 
 			for (size_t i = 0; i < batch.first.size(); ++i)
 				deviceCommandContext->DrawIndexed(batch.second[i], batch.first[i], 0);
 
 			g_Renderer.m_Stats.m_DrawCalls++;
 			g_Renderer.m_Stats.m_TerrainTris += std::accumulate(batch.first.begin(), batch.first.end(), 0) / 3;
 		}
 	}
 }
 
 void CPatchRData::RenderOutline()
 {
 	CTerrain* terrain = m_Patch->m_Parent;
 	ssize_t gx = m_Patch->m_X * PATCH_SIZE;
 	ssize_t gz = m_Patch->m_Z * PATCH_SIZE;
 
 	CVector3D pos;
 	std::vector<CVector3D> line;
 	for (ssize_t i = 0, j = 0; i <= PATCH_SIZE; ++i)
 	{
 		terrain->CalcPosition(gx + i, gz + j, pos);
 		line.push_back(pos);
 	}
 	for (ssize_t i = PATCH_SIZE, j = 1; j <= PATCH_SIZE; ++j)
 	{
 		terrain->CalcPosition(gx + i, gz + j, pos);
 		line.push_back(pos);
 	}
 	for (ssize_t i = PATCH_SIZE-1, j = PATCH_SIZE; i >= 0; --i)
 	{
 		terrain->CalcPosition(gx + i, gz + j, pos);
 		line.push_back(pos);
 	}
 	for (ssize_t i = 0, j = PATCH_SIZE-1; j >= 0; --j)
 	{
 		terrain->CalcPosition(gx + i, gz + j, pos);
 		line.push_back(pos);
 	}
 
 	g_Renderer.GetDebugRenderer().DrawLine(line, CColor(0.0f, 0.0f, 1.0f, 1.0f), 0.1f);
 }
 
 void CPatchRData::RenderSides(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const std::vector<CPatchRData*>& patches)
 {
 	PROFILE3("render terrain sides");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render terrain sides");
 
 	if (patches.empty())
 		return;
 
 	const uint32_t stride = sizeof(SSideVertex);
 	deviceCommandContext->SetVertexAttributeFormat(
 		Renderer::Backend::VertexAttributeStream::POSITION,
 		Renderer::Backend::Format::R32G32B32_SFLOAT,
 		offsetof(SSideVertex, m_Position), stride,
 		Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 
 	CVertexBuffer* lastVB = nullptr;
 	for (CPatchRData* patch : patches)
 	{
 		ENSURE(patch->m_UpdateFlags == 0);
 		if (!patch->m_VBSides)
 			continue;
 		if (lastVB != patch->m_VBSides->m_Owner)
 		{
 			lastVB = patch->m_VBSides->m_Owner;
-			patch->m_VBSides->m_Owner->UploadIfNeeded(deviceCommandContext);
-
+			ENSURE(!lastVB->GetBuffer()->IsDynamic());
 			deviceCommandContext->SetVertexBuffer(0, patch->m_VBSides->m_Owner->GetBuffer(), 0);
 		}
 
 		deviceCommandContext->Draw(patch->m_VBSides->m_Index, patch->m_VBSides->m_Count);
 
 		// bump stats
 		g_Renderer.m_Stats.m_DrawCalls++;
 		g_Renderer.m_Stats.m_TerrainTris += patch->m_VBSides->m_Count / 3;
 	}
 }
 
 void CPatchRData::RenderPriorities(CTextRenderer& textRenderer)
 {
 	CTerrain* terrain = m_Patch->m_Parent;
 	const CCamera& camera = *(g_Game->GetView()->GetCamera());
 
 	for (ssize_t j = 0; j < PATCH_SIZE; ++j)
 	{
 		for (ssize_t i = 0; i < PATCH_SIZE; ++i)
 		{
 			ssize_t gx = m_Patch->m_X * PATCH_SIZE + i;
 			ssize_t gz = m_Patch->m_Z * PATCH_SIZE + j;
 
 			CVector3D pos;
 			terrain->CalcPosition(gx, gz, pos);
 
 			// Move a bit towards the center of the tile
 			pos.X += TERRAIN_TILE_SIZE/4.f;
 			pos.Z += TERRAIN_TILE_SIZE/4.f;
 
 			float x, y;
 			camera.GetScreenCoordinates(pos, x, y);
 
 			textRenderer.PrintfAt(x, y, L"%d", m_Patch->m_MiniPatches[j][i].Priority);
 		}
 	}
 }
 
 //
 // Water build and rendering
 //
 
 // Build vertex buffer for water vertices over our patch
 void CPatchRData::BuildWater()
 {
 	PROFILE3("build water");
 
 	// Number of vertices in each direction in each patch
 	ENSURE(PATCH_SIZE % water_cell_size == 0);
 
 	m_VBWater.Reset();
 	m_VBWaterIndices.Reset();
 	m_VBWaterShore.Reset();
 	m_VBWaterIndicesShore.Reset();
 
 	m_WaterBounds.SetEmpty();
 
 	// We need to use this to access the water manager or we may not have the
 	// actual values but some compiled-in defaults
 	CmpPtr<ICmpWaterManager> cmpWaterManager(*m_Simulation, SYSTEM_ENTITY);
 	if (!cmpWaterManager)
 		return;
 
 	// Build data for water
 	std::vector<SWaterVertex> water_vertex_data;
 	std::vector<u16> water_indices;
 	u16 water_index_map[PATCH_SIZE+1][PATCH_SIZE+1];
 	memset(water_index_map, 0xFF, sizeof(water_index_map));
 
 	// Build data for shore
 	std::vector<SWaterVertex> water_vertex_data_shore;
 	std::vector<u16> water_indices_shore;
 	u16 water_shore_index_map[PATCH_SIZE+1][PATCH_SIZE+1];
 	memset(water_shore_index_map, 0xFF, sizeof(water_shore_index_map));
 
 	const WaterManager& waterManager = g_Renderer.GetSceneRenderer().GetWaterManager();
 
 	CPatch* patch = m_Patch;
 	CTerrain* terrain = patch->m_Parent;
 
 	ssize_t mapSize = terrain->GetVerticesPerSide();
 
 	// Top-left coordinates of our patch.
 	ssize_t px = m_Patch->m_X * PATCH_SIZE;
 	ssize_t pz = m_Patch->m_Z * PATCH_SIZE;
 
 	// To whoever implements different water heights, this is a TODO: water height)
 	float waterHeight = cmpWaterManager->GetExactWaterLevel(0.0f,0.0f);
 
 	// The 4 points making a water tile.
 	int moves[4][2] = {
 		{0, 0},
 		{water_cell_size, 0},
 		{0, water_cell_size},
 		{water_cell_size, water_cell_size}
 	};
 	// Where to look for when checking for water for shore tiles.
 	int check[10][2] = {
 		{0, 0},
 		{water_cell_size, 0},
 		{water_cell_size*2, 0},
 		{0, water_cell_size},
 		{0, water_cell_size*2},
 		{water_cell_size, water_cell_size},
 		{water_cell_size*2, water_cell_size*2},
 		{-water_cell_size, 0},
 		{0, -water_cell_size},
 		{-water_cell_size, -water_cell_size}
 	};
 
 	// build vertices, uv, and shader varying
 	for (ssize_t z = 0; z < PATCH_SIZE; z += water_cell_size)
 	{
 		for (ssize_t x = 0; x < PATCH_SIZE; x += water_cell_size)
 		{
 			// Check that this tile is close to water
 			bool nearWater = false;
 			for (size_t test = 0; test < 10; ++test)
 				if (terrain->GetVertexGroundLevel(x + px + check[test][0], z + pz + check[test][1]) < waterHeight)
 					nearWater = true;
 			if (!nearWater)
 				continue;
 
 			// This is actually lying and I should call CcmpTerrain
 			/*if (!terrain->IsOnMap(x+x1, z+z1)
 			 && !terrain->IsOnMap(x+x1, z+z1 + water_cell_size)
 			 && !terrain->IsOnMap(x+x1 + water_cell_size, z+z1)
 			 && !terrain->IsOnMap(x+x1 + water_cell_size, z+z1 + water_cell_size))
 			 continue;*/
 
 			for (int i = 0; i < 4; ++i)
 			{
 				if (water_index_map[z+moves[i][1]][x+moves[i][0]] != 0xFFFF)
 					continue;
 
 				ssize_t xx = x + px + moves[i][0];
 				ssize_t zz = z + pz + moves[i][1];
 
 				SWaterVertex vertex;
 				terrain->CalcPosition(xx,zz, vertex.m_Position);
 				float depth = waterHeight - vertex.m_Position.Y;
 
 				vertex.m_Position.Y = waterHeight;
 
 				m_WaterBounds += vertex.m_Position;
 
 				vertex.m_WaterData = CVector2D(waterManager.m_WindStrength[xx + zz*mapSize], depth);
 
 				water_index_map[z+moves[i][1]][x+moves[i][0]] = static_cast<u16>(water_vertex_data.size());
 				water_vertex_data.push_back(vertex);
 			}
 			water_indices.push_back(water_index_map[z + moves[2][1]][x + moves[2][0]]);
 			water_indices.push_back(water_index_map[z + moves[0][1]][x + moves[0][0]]);
 			water_indices.push_back(water_index_map[z + moves[1][1]][x + moves[1][0]]);
 			water_indices.push_back(water_index_map[z + moves[1][1]][x + moves[1][0]]);
 			water_indices.push_back(water_index_map[z + moves[3][1]][x + moves[3][0]]);
 			water_indices.push_back(water_index_map[z + moves[2][1]][x + moves[2][0]]);
 
 			// Check id this tile is partly over land.
 			// If so add a square over the terrain. This is necessary to render waves that go on shore.
 			if (terrain->GetVertexGroundLevel(x+px, z+pz) < waterHeight &&
 				terrain->GetVertexGroundLevel(x+px + water_cell_size, z+pz) < waterHeight &&
 				terrain->GetVertexGroundLevel(x+px, z+pz+water_cell_size) < waterHeight &&
 				terrain->GetVertexGroundLevel(x+px + water_cell_size, z+pz+water_cell_size) < waterHeight)
 				continue;
 
 			for (int i = 0; i < 4; ++i)
 			{
 				if (water_shore_index_map[z+moves[i][1]][x+moves[i][0]] != 0xFFFF)
 					continue;
 				ssize_t xx = x + px + moves[i][0];
 				ssize_t zz = z + pz + moves[i][1];
 
 				SWaterVertex vertex;
 				terrain->CalcPosition(xx,zz, vertex.m_Position);
 
 				vertex.m_Position.Y += 0.02f;
 				m_WaterBounds += vertex.m_Position;
 
 				vertex.m_WaterData = CVector2D(0.0f, -5.0f);
 
 				water_shore_index_map[z+moves[i][1]][x+moves[i][0]] = static_cast<u16>(water_vertex_data_shore.size());
 				water_vertex_data_shore.push_back(vertex);
 			}
 			if (terrain->GetTriangulationDir(x + px, z + pz))
 			{
 				water_indices_shore.push_back(water_shore_index_map[z + moves[2][1]][x + moves[2][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[0][1]][x + moves[0][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[1][1]][x + moves[1][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[1][1]][x + moves[1][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[3][1]][x + moves[3][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[2][1]][x + moves[2][0]]);
 			}
 			else
 			{
 				water_indices_shore.push_back(water_shore_index_map[z + moves[3][1]][x + moves[3][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[2][1]][x + moves[2][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[0][1]][x + moves[0][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[3][1]][x + moves[3][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[0][1]][x + moves[0][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[1][1]][x + moves[1][0]]);
 			}
 		}
 	}
 
 	// No vertex buffers if no data generated
 	if (!water_indices.empty())
 	{
 		m_VBWater = g_VBMan.AllocateChunk(
 			sizeof(SWaterVertex), water_vertex_data.size(),
 			Renderer::Backend::IBuffer::Type::VERTEX, false,
 			nullptr, CVertexBufferManager::Group::WATER);
 		m_VBWater->m_Owner->UpdateChunkVertices(m_VBWater.Get(), &water_vertex_data[0]);
 
 		m_VBWaterIndices = g_VBMan.AllocateChunk(
 			sizeof(u16), water_indices.size(),
 			Renderer::Backend::IBuffer::Type::INDEX, false,
 			nullptr, CVertexBufferManager::Group::WATER);
 		m_VBWaterIndices->m_Owner->UpdateChunkVertices(m_VBWaterIndices.Get(), &water_indices[0]);
 	}
 
 	if (!water_indices_shore.empty())
 	{
 		m_VBWaterShore = g_VBMan.AllocateChunk(
 			sizeof(SWaterVertex), water_vertex_data_shore.size(),
 			Renderer::Backend::IBuffer::Type::VERTEX, false,
 			nullptr, CVertexBufferManager::Group::WATER);
 		m_VBWaterShore->m_Owner->UpdateChunkVertices(m_VBWaterShore.Get(), &water_vertex_data_shore[0]);
 
 		// Construct indices buffer
 		m_VBWaterIndicesShore = g_VBMan.AllocateChunk(
 			sizeof(u16), water_indices_shore.size(),
 			Renderer::Backend::IBuffer::Type::INDEX, false,
 			nullptr, CVertexBufferManager::Group::WATER);
 		m_VBWaterIndicesShore->m_Owner->UpdateChunkVertices(m_VBWaterIndicesShore.Get(), &water_indices_shore[0]);
 	}
 }
 
 void CPatchRData::RenderWaterSurface(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const bool bindWaterData)
 {
 	ASSERT(m_UpdateFlags == 0);
 
 	if (!m_VBWater)
 		return;
 
-	m_VBWater->m_Owner->UploadIfNeeded(deviceCommandContext);
-	m_VBWaterIndices->m_Owner->UploadIfNeeded(deviceCommandContext);
+	ENSURE(!m_VBWater->m_Owner->GetBuffer()->IsDynamic());
+	ENSURE(!m_VBWaterIndices->m_Owner->GetBuffer()->IsDynamic());
 
 	const uint32_t stride = sizeof(SWaterVertex);
 	const uint32_t firstVertexOffset = m_VBWater->m_Index * stride;
 
 	deviceCommandContext->SetVertexAttributeFormat(
 		Renderer::Backend::VertexAttributeStream::POSITION,
 		Renderer::Backend::Format::R32G32B32_SFLOAT,
 		offsetof(SWaterVertex, m_Position), stride,
 		Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 	if (bindWaterData)
 	{
 		deviceCommandContext->SetVertexAttributeFormat(
 			Renderer::Backend::VertexAttributeStream::UV1,
 			Renderer::Backend::Format::R32G32_SFLOAT,
 			offsetof(SWaterVertex, m_WaterData), stride,
 			Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 	}
 
 	deviceCommandContext->SetVertexBuffer(
 		0, m_VBWater->m_Owner->GetBuffer(), firstVertexOffset);
 	deviceCommandContext->SetIndexBuffer(m_VBWaterIndices->m_Owner->GetBuffer());
 
 	deviceCommandContext->DrawIndexed(m_VBWaterIndices->m_Index, m_VBWaterIndices->m_Count, 0);
 
 	g_Renderer.m_Stats.m_DrawCalls++;
 	g_Renderer.m_Stats.m_WaterTris += m_VBWaterIndices->m_Count / 3;
 }
 
 void CPatchRData::RenderWaterShore(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext)
 {
 	ASSERT(m_UpdateFlags == 0);
 
 	if (!m_VBWaterShore)
 		return;
 
-	m_VBWaterShore->m_Owner->UploadIfNeeded(deviceCommandContext);
-	m_VBWaterIndicesShore->m_Owner->UploadIfNeeded(deviceCommandContext);
+	ENSURE(!m_VBWaterShore->m_Owner->GetBuffer()->IsDynamic());
+	ENSURE(!m_VBWaterIndicesShore->m_Owner->GetBuffer()->IsDynamic());
 
 	const uint32_t stride = sizeof(SWaterVertex);
 	const uint32_t firstVertexOffset = m_VBWaterShore->m_Index * stride;
 
 	deviceCommandContext->SetVertexAttributeFormat(
 		Renderer::Backend::VertexAttributeStream::POSITION,
 		Renderer::Backend::Format::R32G32B32_SFLOAT,
 		offsetof(SWaterVertex, m_Position), stride,
 		Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 	deviceCommandContext->SetVertexAttributeFormat(
 		Renderer::Backend::VertexAttributeStream::UV1,
 		Renderer::Backend::Format::R32G32_SFLOAT,
 		offsetof(SWaterVertex, m_WaterData), stride,
 		Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 
 	deviceCommandContext->SetVertexBuffer(
 		0, m_VBWaterShore->m_Owner->GetBuffer(), firstVertexOffset);
 	deviceCommandContext->SetIndexBuffer(m_VBWaterIndicesShore->m_Owner->GetBuffer());
 
 	deviceCommandContext->DrawIndexed(m_VBWaterIndicesShore->m_Index, m_VBWaterIndicesShore->m_Count, 0);
 
 	g_Renderer.m_Stats.m_DrawCalls++;
 	g_Renderer.m_Stats.m_WaterTris += m_VBWaterIndicesShore->m_Count / 3;
 }
Index: ps/trunk/source/renderer/SceneRenderer.cpp
===================================================================
--- ps/trunk/source/renderer/SceneRenderer.cpp	(revision 27181)
+++ ps/trunk/source/renderer/SceneRenderer.cpp	(revision 27182)
@@ -1,1204 +1,1216 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "SceneRenderer.h"
 
 #include "graphics/Camera.h"
 #include "graphics/Decal.h"
 #include "graphics/GameView.h"
 #include "graphics/LightEnv.h"
 #include "graphics/LOSTexture.h"
 #include "graphics/MaterialManager.h"
 #include "graphics/MiniMapTexture.h"
 #include "graphics/Model.h"
 #include "graphics/ModelDef.h"
 #include "graphics/ParticleManager.h"
 #include "graphics/Patch.h"
 #include "graphics/ShaderManager.h"
 #include "graphics/TerritoryTexture.h"
 #include "graphics/Terrain.h"
 #include "graphics/Texture.h"
 #include "graphics/TextureManager.h"
 #include "maths/Matrix3D.h"
 #include "maths/MathUtil.h"
 #include "ps/CLogger.h"
 #include "ps/ConfigDB.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Game.h"
 #include "ps/Profile.h"
 #include "ps/VideoMode.h"
 #include "ps/World.h"
 #include "renderer/backend/IDevice.h"
 #include "renderer/DebugRenderer.h"
 #include "renderer/HWLightingModelRenderer.h"
 #include "renderer/InstancingModelRenderer.h"
 #include "renderer/ModelRenderer.h"
 #include "renderer/OverlayRenderer.h"
 #include "renderer/ParticleRenderer.h"
 #include "renderer/PostprocManager.h"
 #include "renderer/Renderer.h"
 #include "renderer/RenderingOptions.h"
 #include "renderer/RenderModifiers.h"
 #include "renderer/ShadowMap.h"
 #include "renderer/SilhouetteRenderer.h"
 #include "renderer/SkyManager.h"
 #include "renderer/TerrainOverlay.h"
 #include "renderer/TerrainRenderer.h"
 #include "renderer/WaterManager.h"
 
 #include <algorithm>
 
 struct SScreenRect
 {
 	int x1, y1, x2, y2;
 };
 
 /**
  * Struct CSceneRendererInternals: Truly hide data that is supposed to be hidden
  * in this structure so it won't even appear in header files.
  */
 class CSceneRenderer::Internals
 {
 	NONCOPYABLE(Internals);
 public:
 	Internals() = default;
 	~Internals() = default;
 
 	/// Water manager
 	WaterManager waterManager;
 
 	/// Sky manager
 	SkyManager skyManager;
 
 	/// Terrain renderer
 	TerrainRenderer terrainRenderer;
 
 	/// Overlay renderer
 	OverlayRenderer overlayRenderer;
 
 	/// Particle manager
 	CParticleManager particleManager;
 
 	/// Particle renderer
 	ParticleRenderer particleRenderer;
 
 	/// Material manager
 	CMaterialManager materialManager;
 
 	/// Shadow map
 	ShadowMap shadow;
 
 	SilhouetteRenderer silhouetteRenderer;
 
 	/// Various model renderers
 	struct Models
 	{
 		// NOTE: The current renderer design (with ModelRenderer, ModelVertexRenderer,
 		// RenderModifier, etc) is mostly a relic of an older design that implemented
 		// the different materials and rendering modes through extensive subclassing
 		// and hooking objects together in various combinations.
 		// The new design uses the CShaderManager API to abstract away the details
 		// of rendering, and uses a data-driven approach to materials, so there are
 		// now a small number of generic subclasses instead of many specialised subclasses,
 		// but most of the old infrastructure hasn't been refactored out yet and leads to
 		// some unwanted complexity.
 
 		// Submitted models are split on two axes:
 		//  - Normal vs Transp[arent] - alpha-blended models are stored in a separate
 		//    list so we can draw them above/below the alpha-blended water plane correctly
 		//  - Skinned vs Unskinned - with hardware lighting we don't need to
 		//    duplicate mesh data per model instance (except for skinned models),
 		//    so non-skinned models get different ModelVertexRenderers
 
 		ModelRendererPtr NormalSkinned;
 		ModelRendererPtr NormalUnskinned; // == NormalSkinned if unskinned shader instancing not supported
 		ModelRendererPtr TranspSkinned;
 		ModelRendererPtr TranspUnskinned; // == TranspSkinned if unskinned shader instancing not supported
 
 		ModelVertexRendererPtr VertexRendererShader;
 		ModelVertexRendererPtr VertexInstancingShader;
 		ModelVertexRendererPtr VertexGPUSkinningShader;
 
 		LitRenderModifierPtr ModShader;
 	} Model;
 
 	CShaderDefines globalContext;
 
 	/**
 	 * Renders all non-alpha-blended models with the given context.
 	 */
 	void CallModelRenderers(
 		Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 		const CShaderDefines& context, int cullGroup, int flags)
 	{
 		CShaderDefines contextSkinned = context;
 		if (g_RenderingOptions.GetGPUSkinning())
 		{
 			contextSkinned.Add(str_USE_INSTANCING, str_1);
 			contextSkinned.Add(str_USE_GPU_SKINNING, str_1);
 		}
 		Model.NormalSkinned->Render(deviceCommandContext, Model.ModShader, contextSkinned, cullGroup, flags);
 
 		if (Model.NormalUnskinned != Model.NormalSkinned)
 		{
 			CShaderDefines contextUnskinned = context;
 			contextUnskinned.Add(str_USE_INSTANCING, str_1);
 			Model.NormalUnskinned->Render(deviceCommandContext, Model.ModShader, contextUnskinned, cullGroup, flags);
 		}
 	}
 
 	/**
 	 * Renders all alpha-blended models with the given context.
 	 */
 	void CallTranspModelRenderers(
 		Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 		const CShaderDefines& context, int cullGroup, int flags)
 	{
 		CShaderDefines contextSkinned = context;
 		if (g_RenderingOptions.GetGPUSkinning())
 		{
 			contextSkinned.Add(str_USE_INSTANCING, str_1);
 			contextSkinned.Add(str_USE_GPU_SKINNING, str_1);
 		}
 		Model.TranspSkinned->Render(deviceCommandContext, Model.ModShader, contextSkinned, cullGroup, flags);
 
 		if (Model.TranspUnskinned != Model.TranspSkinned)
 		{
 			CShaderDefines contextUnskinned = context;
 			contextUnskinned.Add(str_USE_INSTANCING, str_1);
 			Model.TranspUnskinned->Render(deviceCommandContext, Model.ModShader, contextUnskinned, cullGroup, flags);
 		}
 	}
 };
 
 CSceneRenderer::CSceneRenderer()
 {
 	m = std::make_unique<Internals>();
 
 	m_TerrainRenderMode = SOLID;
 	m_WaterRenderMode = SOLID;
 	m_ModelRenderMode = SOLID;
 	m_OverlayRenderMode = SOLID;
 
 	m_DisplayTerrainPriorities = false;
 
 	m_LightEnv = nullptr;
 
 	m_CurrentScene = nullptr;
 }
 
 CSceneRenderer::~CSceneRenderer()
 {
 	// We no longer UnloadWaterTextures here -
 	// that is the responsibility of the module that asked for
 	// them to be loaded (i.e. CGameView).
 	m.reset();
 }
 
 void CSceneRenderer::ReloadShaders()
 {
 	m->globalContext = CShaderDefines();
 
 	Renderer::Backend::IDevice* device = g_VideoMode.GetBackendDevice();
 
 	if (g_RenderingOptions.GetShadows())
 	{
 		m->globalContext.Add(str_USE_SHADOW, str_1);
 		if (device->GetBackend() == Renderer::Backend::Backend::GL_ARB &&
 			device->GetCapabilities().ARBShadersShadow)
 		{
 			m->globalContext.Add(str_USE_FP_SHADOW, str_1);
 		}
 		if (g_RenderingOptions.GetShadowPCF())
 			m->globalContext.Add(str_USE_SHADOW_PCF, str_1);
 		const int cascadeCount = m->shadow.GetCascadeCount();
 		ENSURE(1 <= cascadeCount && cascadeCount <= 4);
 		const CStrIntern cascadeCountStr[5] = {str_0, str_1, str_2, str_3, str_4};
 		m->globalContext.Add(str_SHADOWS_CASCADE_COUNT, cascadeCountStr[cascadeCount]);
 #if !CONFIG2_GLES
 		m->globalContext.Add(str_USE_SHADOW_SAMPLER, str_1);
 #endif
 	}
 
 	m->globalContext.Add(str_RENDER_DEBUG_MODE,
 		RenderDebugModeEnum::ToString(g_RenderingOptions.GetRenderDebugMode()));
 
 	if (device->GetBackend() != Renderer::Backend::Backend::GL_ARB && g_RenderingOptions.GetFog())
 		m->globalContext.Add(str_USE_FOG, str_1);
 
 	m->Model.ModShader = LitRenderModifierPtr(new ShaderRenderModifier());
 
 	ENSURE(g_RenderingOptions.GetRenderPath() != RenderPath::FIXED);
 	m->Model.VertexRendererShader = ModelVertexRendererPtr(new ShaderModelVertexRenderer());
 	m->Model.VertexInstancingShader = ModelVertexRendererPtr(new InstancingModelRenderer(false, device->GetBackend() != Renderer::Backend::Backend::GL_ARB));
 
 	if (g_RenderingOptions.GetGPUSkinning()) // TODO: should check caps and GLSL etc too
 	{
 		m->Model.VertexGPUSkinningShader = ModelVertexRendererPtr(new InstancingModelRenderer(true, device->GetBackend() != Renderer::Backend::Backend::GL_ARB));
 		m->Model.NormalSkinned = ModelRendererPtr(new ShaderModelRenderer(m->Model.VertexGPUSkinningShader));
 		m->Model.TranspSkinned = ModelRendererPtr(new ShaderModelRenderer(m->Model.VertexGPUSkinningShader));
 	}
 	else
 	{
 		m->Model.VertexGPUSkinningShader.reset();
 		m->Model.NormalSkinned = ModelRendererPtr(new ShaderModelRenderer(m->Model.VertexRendererShader));
 		m->Model.TranspSkinned = ModelRendererPtr(new ShaderModelRenderer(m->Model.VertexRendererShader));
 	}
 
 	m->Model.NormalUnskinned = ModelRendererPtr(new ShaderModelRenderer(m->Model.VertexInstancingShader));
 	m->Model.TranspUnskinned = ModelRendererPtr(new ShaderModelRenderer(m->Model.VertexInstancingShader));
 }
 
 void CSceneRenderer::Initialize()
 {
 	// Let component renderers perform one-time initialization after graphics capabilities and
 	// the shader path have been determined.
 	m->overlayRenderer.Initialize();
 }
 
 // resize renderer view
 void CSceneRenderer::Resize(int UNUSED(width), int UNUSED(height))
 {
 	// need to recreate the shadow map object to resize the shadow texture
 	m->shadow.RecreateTexture();
 
 	m->waterManager.RecreateOrLoadTexturesIfNeeded();
 }
 
 void CSceneRenderer::BeginFrame()
 {
 	// choose model renderers for this frame
 	m->Model.ModShader->SetShadowMap(&m->shadow);
 	m->Model.ModShader->SetLightEnv(m_LightEnv);
 }
 
 void CSceneRenderer::SetSimulation(CSimulation2* simulation)
 {
 	// set current simulation context for terrain renderer
 	m->terrainRenderer.SetSimulation(simulation);
 }
 
 void CSceneRenderer::RenderShadowMap(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const CShaderDefines& context)
 {
 	PROFILE3_GPU("shadow map");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render shadow map");
 
 	CShaderDefines shadowsContext = context;
 	shadowsContext.Add(str_PASS_SHADOWS, str_1);
 
 	CShaderDefines contextCast = shadowsContext;
 	contextCast.Add(str_MODE_SHADOWCAST, str_1);
 
 	m->shadow.BeginRender(deviceCommandContext);
 
 	const int cascadeCount = m->shadow.GetCascadeCount();
 	ENSURE(0 <= cascadeCount && cascadeCount <= 4);
 	for (int cascade = 0; cascade < cascadeCount; ++cascade)
 	{
 		m->shadow.PrepareCamera(deviceCommandContext, cascade);
 
 		const int cullGroup = CULL_SHADOWS_CASCADE_0 + cascade;
 		{
 			PROFILE("render patches");
 			m->terrainRenderer.RenderPatches(deviceCommandContext, cullGroup, shadowsContext);
 		}
 
 		{
 			PROFILE("render models");
 			m->CallModelRenderers(deviceCommandContext, contextCast, cullGroup, MODELFLAG_CASTSHADOWS);
 		}
 
 		{
 			PROFILE("render transparent models");
 			m->CallTranspModelRenderers(deviceCommandContext, contextCast, cullGroup, MODELFLAG_CASTSHADOWS);
 		}
 	}
 
 	m->shadow.EndRender(deviceCommandContext);
 
 	g_Renderer.SetViewport(m_ViewCamera.GetViewPort());
 }
 
 void CSceneRenderer::RenderPatches(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const CShaderDefines& context, int cullGroup)
 {
 	PROFILE3_GPU("patches");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render patches");
 
 	// Switch on wireframe if we need it.
 	CShaderDefines localContext = context;
 	if (m_TerrainRenderMode == WIREFRAME)
 		localContext.Add(str_MODE_WIREFRAME, str_1);
 
 	// Render all the patches, including blend pass.
 	m->terrainRenderer.RenderTerrainShader(deviceCommandContext, localContext, cullGroup,
 		g_RenderingOptions.GetShadows() ? &m->shadow : nullptr);
 
 	if (m_TerrainRenderMode == EDGED_FACES)
 	{
 		localContext.Add(str_MODE_WIREFRAME, str_1);
 		// Edged faces: need to make a second pass over the data.
 
 		// Render tiles edges.
 		m->terrainRenderer.RenderPatches(
 			deviceCommandContext, cullGroup, localContext, CColor(0.5f, 0.5f, 1.0f, 1.0f));
 
 		// Render outline of each patch.
 		m->terrainRenderer.RenderOutlines(deviceCommandContext, cullGroup);
 	}
 }
 
 void CSceneRenderer::RenderModels(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const CShaderDefines& context, int cullGroup)
 {
 	PROFILE3_GPU("models");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render models");
 
 	int flags = 0;
 
 	CShaderDefines localContext = context;
 
 	if (m_ModelRenderMode == WIREFRAME)
 		localContext.Add(str_MODE_WIREFRAME, str_1);
 
 	m->CallModelRenderers(deviceCommandContext, localContext, cullGroup, flags);
 
 	if (m_ModelRenderMode == EDGED_FACES)
 	{
 		localContext.Add(str_MODE_WIREFRAME_SOLID, str_1);
 		m->CallModelRenderers(deviceCommandContext, localContext, cullGroup, flags);
 	}
 }
 
 void CSceneRenderer::RenderTransparentModels(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const CShaderDefines& context, int cullGroup, ETransparentMode transparentMode)
 {
 	PROFILE3_GPU("transparent models");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render transparent models");
 
 	int flags = 0;
 
 	CShaderDefines contextOpaque = context;
 	contextOpaque.Add(str_ALPHABLEND_PASS_OPAQUE, str_1);
 
 	CShaderDefines contextBlend = context;
 	contextBlend.Add(str_ALPHABLEND_PASS_BLEND, str_1);
 
 	if (m_ModelRenderMode == WIREFRAME)
 	{
 		contextOpaque.Add(str_MODE_WIREFRAME, str_1);
 		contextBlend.Add(str_MODE_WIREFRAME, str_1);
 	}
 
 	if (transparentMode == TRANSPARENT || transparentMode == TRANSPARENT_OPAQUE)
 		m->CallTranspModelRenderers(deviceCommandContext, contextOpaque, cullGroup, flags);
 
 	if (transparentMode == TRANSPARENT || transparentMode == TRANSPARENT_BLEND)
 		m->CallTranspModelRenderers(deviceCommandContext, contextBlend, cullGroup, flags);
 
 	if (m_ModelRenderMode == EDGED_FACES)
 	{
 		CShaderDefines contextWireframe = contextOpaque;
 		contextWireframe.Add(str_MODE_WIREFRAME, str_1);
 
 		m->CallTranspModelRenderers(deviceCommandContext, contextWireframe, cullGroup, flags);
 	}
 }
 
 // SetObliqueFrustumClipping: change the near plane to the given clip plane (in world space)
 // Based on code from Game Programming Gems 5, from http://www.terathon.com/code/oblique.html
 // - worldPlane is a clip plane in world space (worldPlane.Dot(v) >= 0 for any vector v passing the clipping test)
 void CSceneRenderer::SetObliqueFrustumClipping(CCamera& camera, const CVector4D& worldPlane) const
 {
 	// First, we'll convert the given clip plane to camera space, then we'll
 	// Get the view matrix and normal matrix (top 3x3 part of view matrix)
 	CMatrix3D normalMatrix = camera.GetOrientation().GetTranspose();
 	CVector4D camPlane = normalMatrix.Transform(worldPlane);
 
 	CMatrix3D matrix = camera.GetProjection();
 
 	// Calculate the clip-space corner point opposite the clipping plane
 	// as (sgn(camPlane.x), sgn(camPlane.y), 1, 1) and
 	// transform it into camera space by multiplying it
 	// by the inverse of the projection matrix
 
 	CVector4D q;
 	q.X = (Sign(camPlane.X) - matrix[8] / matrix[11]) / matrix[0];
 	q.Y = (Sign(camPlane.Y) - matrix[9] / matrix[11]) / matrix[5];
 	q.Z = 1.0f / matrix[11];
 	q.W = (1.0f - matrix[10] / matrix[11]) / matrix[14];
 
 	// Calculate the scaled plane vector
 	CVector4D c = camPlane * (2.0f * matrix[11] / camPlane.Dot(q));
 
 	// Replace the third row of the projection matrix
 	matrix[2] = c.X;
 	matrix[6] = c.Y;
 	matrix[10] = c.Z - matrix[11];
 	matrix[14] = c.W;
 
 	// Load it back into the camera
 	camera.SetProjection(matrix);
 }
 
 void CSceneRenderer::ComputeReflectionCamera(CCamera& camera, const CBoundingBoxAligned& scissor) const
 {
 	WaterManager& wm = m->waterManager;
 
 	CMatrix3D projection;
 	if (m_ViewCamera.GetProjectionType() == CCamera::ProjectionType::PERSPECTIVE)
 	{
 		const float aspectRatio = 1.0f;
 		// Expand fov slightly since ripples can reflect parts of the scene that
 		// are slightly outside the normal camera view, and we want to avoid any
 		// noticeable edge-filtering artifacts
 		projection.SetPerspective(m_ViewCamera.GetFOV() * 1.05f, aspectRatio, m_ViewCamera.GetNearPlane(), m_ViewCamera.GetFarPlane());
 	}
 	else
 		projection = m_ViewCamera.GetProjection();
 
 	camera = m_ViewCamera;
 
 	// Temporarily change the camera to one that is reflected.
 	// Also, for texturing purposes, make it render to a view port the size of the
 	// water texture, stretch the image according to our aspect ratio so it covers
 	// the whole screen despite being rendered into a square, and cover slightly more
 	// of the view so we can see wavy reflections of slightly off-screen objects.
 	camera.m_Orientation.Scale(1, -1, 1);
 	camera.m_Orientation.Translate(0, 2 * wm.m_WaterHeight, 0);
 	camera.UpdateFrustum(scissor);
 	// Clip slightly above the water to improve reflections of objects on the water
 	// when the reflections are distorted.
 	camera.ClipFrustum(CVector4D(0, 1, 0, -wm.m_WaterHeight + 2.0f));
 
 	SViewPort vp;
 	vp.m_Height = wm.m_RefTextureSize;
 	vp.m_Width = wm.m_RefTextureSize;
 	vp.m_X = 0;
 	vp.m_Y = 0;
 	camera.SetViewPort(vp);
 	camera.SetProjection(projection);
 	CMatrix3D scaleMat;
 	scaleMat.SetScaling(g_Renderer.GetHeight() / static_cast<float>(std::max(1, g_Renderer.GetWidth())), 1.0f, 1.0f);
 	camera.SetProjection(scaleMat * camera.GetProjection());
 
 	CVector4D camPlane(0, 1, 0, -wm.m_WaterHeight + 0.5f);
 	SetObliqueFrustumClipping(camera, camPlane);
 }
 
 void CSceneRenderer::ComputeRefractionCamera(CCamera& camera, const CBoundingBoxAligned& scissor) const
 {
 	WaterManager& wm = m->waterManager;
 
 	CMatrix3D projection;
 	if (m_ViewCamera.GetProjectionType() == CCamera::ProjectionType::PERSPECTIVE)
 	{
 		const float aspectRatio = 1.0f;
 		// Expand fov slightly since ripples can reflect parts of the scene that
 		// are slightly outside the normal camera view, and we want to avoid any
 		// noticeable edge-filtering artifacts
 		projection.SetPerspective(m_ViewCamera.GetFOV() * 1.05f, aspectRatio, m_ViewCamera.GetNearPlane(), m_ViewCamera.GetFarPlane());
 	}
 	else
 		projection = m_ViewCamera.GetProjection();
 
 	camera = m_ViewCamera;
 
 	// Temporarily change the camera to make it render to a view port the size of the
 	// water texture, stretch the image according to our aspect ratio so it covers
 	// the whole screen despite being rendered into a square, and cover slightly more
 	// of the view so we can see wavy refractions of slightly off-screen objects.
 	camera.UpdateFrustum(scissor);
 	camera.ClipFrustum(CVector4D(0, -1, 0, wm.m_WaterHeight + 0.5f));	// add some to avoid artifacts near steep shores.
 
 	SViewPort vp;
 	vp.m_Height = wm.m_RefTextureSize;
 	vp.m_Width = wm.m_RefTextureSize;
 	vp.m_X = 0;
 	vp.m_Y = 0;
 	camera.SetViewPort(vp);
 	camera.SetProjection(projection);
 	CMatrix3D scaleMat;
 	scaleMat.SetScaling(g_Renderer.GetHeight() / static_cast<float>(std::max(1, g_Renderer.GetWidth())), 1.0f, 1.0f);
 	camera.SetProjection(scaleMat * camera.GetProjection());
 }
 
 // RenderReflections: render the water reflections to the reflection texture
 void CSceneRenderer::RenderReflections(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const CShaderDefines& context, const CBoundingBoxAligned& scissor)
 {
 	PROFILE3_GPU("water reflections");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render water reflections");
 
 	WaterManager& wm = m->waterManager;
 
 	// Remember old camera
 	CCamera normalCamera = m_ViewCamera;
 
 	ComputeReflectionCamera(m_ViewCamera, scissor);
 	const CBoundingBoxAligned reflectionScissor =
 		m->terrainRenderer.ScissorWater(CULL_DEFAULT, m_ViewCamera);
 	if (reflectionScissor.IsEmpty())
 	{
 		m_ViewCamera = normalCamera;
 		return;
 	}
 
 	g_Renderer.SetViewport(m_ViewCamera.GetViewPort());
 
 	// Save the model-view-projection matrix so the shaders can use it for projective texturing
 	wm.m_ReflectionMatrix = m_ViewCamera.GetViewProjection();
 
 	float vpHeight = wm.m_RefTextureSize;
 	float vpWidth = wm.m_RefTextureSize;
 
 	SScreenRect screenScissor;
 	screenScissor.x1 = static_cast<int>(floor((reflectionScissor[0].X * 0.5f + 0.5f) * vpWidth));
 	screenScissor.y1 = static_cast<int>(floor((reflectionScissor[0].Y * 0.5f + 0.5f) * vpHeight));
 	screenScissor.x2 = static_cast<int>(ceil((reflectionScissor[1].X * 0.5f + 0.5f) * vpWidth));
 	screenScissor.y2 = static_cast<int>(ceil((reflectionScissor[1].Y * 0.5f + 0.5f) * vpHeight));
 
 	Renderer::Backend::IDeviceCommandContext::Rect scissorRect;
 	scissorRect.x = screenScissor.x1;
 	scissorRect.y = screenScissor.y1;
 	scissorRect.width = screenScissor.x2 - screenScissor.x1;
 	scissorRect.height = screenScissor.y2 - screenScissor.y1;
 	deviceCommandContext->SetScissors(1, &scissorRect);
 
 	deviceCommandContext->SetGraphicsPipelineState(
 		Renderer::Backend::MakeDefaultGraphicsPipelineStateDesc());
 	deviceCommandContext->BeginFramebufferPass(wm.m_ReflectionFramebuffer.get());
 	deviceCommandContext->ClearFramebuffer();
 
 	CShaderDefines reflectionsContext = context;
 	reflectionsContext.Add(str_PASS_REFLECTIONS, str_1);
 
 	// Render terrain and models
 	RenderPatches(deviceCommandContext, reflectionsContext, CULL_REFLECTIONS);
 	RenderModels(deviceCommandContext, reflectionsContext, CULL_REFLECTIONS);
 	RenderTransparentModels(deviceCommandContext, reflectionsContext, CULL_REFLECTIONS, TRANSPARENT);
 
 	// Particles are always oriented to face the camera in the vertex shader,
 	// so they don't need the inverted cull face.
 	if (g_RenderingOptions.GetParticles())
 	{
 		RenderParticles(deviceCommandContext, CULL_REFLECTIONS);
 	}
 
 	deviceCommandContext->SetScissors(0, nullptr);
 	deviceCommandContext->EndFramebufferPass();
 
 	// Reset old camera
 	m_ViewCamera = normalCamera;
 	g_Renderer.SetViewport(m_ViewCamera.GetViewPort());
 }
 
 // RenderRefractions: render the water refractions to the refraction texture
 void CSceneRenderer::RenderRefractions(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const CShaderDefines& context, const CBoundingBoxAligned &scissor)
 {
 	PROFILE3_GPU("water refractions");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render water refractions");
 
 	WaterManager& wm = m->waterManager;
 
 	// Remember old camera
 	CCamera normalCamera = m_ViewCamera;
 
 	ComputeRefractionCamera(m_ViewCamera, scissor);
 	const CBoundingBoxAligned refractionScissor =
 		m->terrainRenderer.ScissorWater(CULL_DEFAULT, m_ViewCamera);
 	if (refractionScissor.IsEmpty())
 	{
 		m_ViewCamera = normalCamera;
 		return;
 	}
 
 	CVector4D camPlane(0, -1, 0, wm.m_WaterHeight + 2.0f);
 	SetObliqueFrustumClipping(m_ViewCamera, camPlane);
 
 	g_Renderer.SetViewport(m_ViewCamera.GetViewPort());
 
 	// Save the model-view-projection matrix so the shaders can use it for projective texturing
 	wm.m_RefractionMatrix = m_ViewCamera.GetViewProjection();
 	wm.m_RefractionProjInvMatrix = m_ViewCamera.GetProjection().GetInverse();
 	wm.m_RefractionViewInvMatrix = m_ViewCamera.GetOrientation();
 
 	float vpHeight = wm.m_RefTextureSize;
 	float vpWidth = wm.m_RefTextureSize;
 
 	SScreenRect screenScissor;
 	screenScissor.x1 = static_cast<int>(floor((refractionScissor[0].X * 0.5f + 0.5f) * vpWidth));
 	screenScissor.y1 = static_cast<int>(floor((refractionScissor[0].Y * 0.5f + 0.5f) * vpHeight));
 	screenScissor.x2 = static_cast<int>(ceil((refractionScissor[1].X * 0.5f + 0.5f) * vpWidth));
 	screenScissor.y2 = static_cast<int>(ceil((refractionScissor[1].Y * 0.5f + 0.5f) * vpHeight));
 
 	Renderer::Backend::IDeviceCommandContext::Rect scissorRect;
 	scissorRect.x = screenScissor.x1;
 	scissorRect.y = screenScissor.y1;
 	scissorRect.width = screenScissor.x2 - screenScissor.x1;
 	scissorRect.height = screenScissor.y2 - screenScissor.y1;
 	deviceCommandContext->SetScissors(1, &scissorRect);
 
 	deviceCommandContext->SetGraphicsPipelineState(
 		Renderer::Backend::MakeDefaultGraphicsPipelineStateDesc());
 	deviceCommandContext->BeginFramebufferPass(wm.m_RefractionFramebuffer.get());
 	deviceCommandContext->ClearFramebuffer();
 
 	// Render terrain and models
 	RenderPatches(deviceCommandContext, context, CULL_REFRACTIONS);
 
 	// Render debug-related terrain overlays to make it visible under water.
 	ITerrainOverlay::RenderOverlaysBeforeWater(deviceCommandContext);
 
 	RenderModels(deviceCommandContext, context, CULL_REFRACTIONS);
 	RenderTransparentModels(deviceCommandContext, context, CULL_REFRACTIONS, TRANSPARENT_OPAQUE);
 
 	deviceCommandContext->SetScissors(0, nullptr);
 	deviceCommandContext->EndFramebufferPass();
 
 	// Reset old camera
 	m_ViewCamera = normalCamera;
 	g_Renderer.SetViewport(m_ViewCamera.GetViewPort());
 }
 
 void CSceneRenderer::RenderSilhouettes(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const CShaderDefines& context)
 {
 	PROFILE3_GPU("silhouettes");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render silhouettes");
 
 	CShaderDefines contextOccluder = context;
 	contextOccluder.Add(str_MODE_SILHOUETTEOCCLUDER, str_1);
 
 	CShaderDefines contextDisplay = context;
 	contextDisplay.Add(str_MODE_SILHOUETTEDISPLAY, str_1);
 
 	// Render silhouettes of units hidden behind terrain or occluders.
 	// To avoid breaking the standard rendering of alpha-blended objects, this
 	// has to be done in a separate pass.
 	// First we render all occluders into depth, then render all units with
 	// inverted depth test so any behind an occluder will get drawn in a constant
 	// color.
 
 	deviceCommandContext->SetGraphicsPipelineState(
 		Renderer::Backend::MakeDefaultGraphicsPipelineStateDesc());
 	deviceCommandContext->ClearFramebuffer(false, true, true);
 
 	// Render occluders:
 
 	{
 		PROFILE("render patches");
 		m->terrainRenderer.RenderPatches(deviceCommandContext, CULL_SILHOUETTE_OCCLUDER, contextOccluder);
 	}
 
 	{
 		PROFILE("render model occluders");
 		m->CallModelRenderers(deviceCommandContext, contextOccluder, CULL_SILHOUETTE_OCCLUDER, 0);
 	}
 
 	{
 		PROFILE("render transparent occluders");
 		m->CallTranspModelRenderers(deviceCommandContext, contextOccluder, CULL_SILHOUETTE_OCCLUDER, 0);
 	}
 
 	// Since we can't sort, we'll use the stencil buffer to ensure we only draw
 	// a pixel once (using the color of whatever model happens to be drawn first).
 	{
 		PROFILE("render model casters");
 		m->CallModelRenderers(deviceCommandContext, contextDisplay, CULL_SILHOUETTE_CASTER, 0);
 	}
 
 	{
 		PROFILE("render transparent casters");
 		m->CallTranspModelRenderers(deviceCommandContext, contextDisplay, CULL_SILHOUETTE_CASTER, 0);
 	}
 }
 
 void CSceneRenderer::RenderParticles(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	int cullGroup)
 {
 	PROFILE3_GPU("particles");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render particles");
 
 	m->particleRenderer.RenderParticles(
 		deviceCommandContext, cullGroup, m_ModelRenderMode == WIREFRAME);
 
 	if (m_ModelRenderMode == EDGED_FACES)
 	{
 		m->particleRenderer.RenderParticles(
 			deviceCommandContext, cullGroup, true);
 		m->particleRenderer.RenderBounds(cullGroup);
 	}
 }
 
 // RenderSubmissions: force rendering of any batched objects
 void CSceneRenderer::RenderSubmissions(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const CBoundingBoxAligned& waterScissor)
 {
 	PROFILE3("render submissions");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render submissions");
 
 	m->skyManager.LoadAndUploadSkyTexturesIfNeeded(deviceCommandContext);
 
 	GetScene().GetLOSTexture().InterpolateLOS(deviceCommandContext);
 	GetScene().GetTerritoryTexture().UpdateIfNeeded(deviceCommandContext);
 	GetScene().GetMiniMapTexture().Render(
 		deviceCommandContext, GetScene().GetLOSTexture(), GetScene().GetTerritoryTexture());
 
 	CShaderDefines context = m->globalContext;
 
 	int cullGroup = CULL_DEFAULT;
 
 	// Set the camera
 	g_Renderer.SetViewport(m_ViewCamera.GetViewPort());
 
 	// Prepare model renderers
 	{
 	PROFILE3("prepare models");
 	m->Model.NormalSkinned->PrepareModels();
 	m->Model.TranspSkinned->PrepareModels();
 	if (m->Model.NormalUnskinned != m->Model.NormalSkinned)
 		m->Model.NormalUnskinned->PrepareModels();
 	if (m->Model.TranspUnskinned != m->Model.TranspSkinned)
 		m->Model.TranspUnskinned->PrepareModels();
 	}
 
 	m->terrainRenderer.PrepareForRendering();
 
 	m->overlayRenderer.PrepareForRendering();
 
 	m->particleRenderer.PrepareForRendering(context);
 
+	{
+		PROFILE3("upload models");
+		m->Model.NormalSkinned->UploadModels(deviceCommandContext);
+		m->Model.TranspSkinned->UploadModels(deviceCommandContext);
+		if (m->Model.NormalUnskinned != m->Model.NormalSkinned)
+			m->Model.NormalUnskinned->UploadModels(deviceCommandContext);
+		if (m->Model.TranspUnskinned != m->Model.TranspSkinned)
+			m->Model.TranspUnskinned->UploadModels(deviceCommandContext);
+	}
+
+	m->overlayRenderer.Upload(deviceCommandContext);
+
 	if (g_RenderingOptions.GetShadows())
 	{
 		RenderShadowMap(deviceCommandContext, context);
 	}
 
 	if (m->waterManager.m_RenderWater)
 	{
 		if (waterScissor.GetVolume() > 0 && m->waterManager.WillRenderFancyWater())
 		{
 			m->waterManager.UpdateQuality();
 
 			PROFILE3_GPU("water scissor");
 			if (g_RenderingOptions.GetWaterReflection())
 				RenderReflections(deviceCommandContext, context, waterScissor);
 
 			if (g_RenderingOptions.GetWaterRefraction())
 				RenderRefractions(deviceCommandContext, context, waterScissor);
 
 			if (g_RenderingOptions.GetWaterFancyEffects())
 				m->terrainRenderer.RenderWaterFoamOccluders(deviceCommandContext, cullGroup);
 		}
 	}
 
 	deviceCommandContext->SetGraphicsPipelineState(
 		Renderer::Backend::MakeDefaultGraphicsPipelineStateDesc());
 
 	CPostprocManager& postprocManager = g_Renderer.GetPostprocManager();
 	if (postprocManager.IsEnabled())
 	{
 		// We have to update the post process manager with real near/far planes
 		// that we use for the scene rendering.
 		postprocManager.SetDepthBufferClipPlanes(
 			m_ViewCamera.GetNearPlane(), m_ViewCamera.GetFarPlane()
 		);
 		postprocManager.Initialize();
 		postprocManager.CaptureRenderOutput(deviceCommandContext);
 	}
 	else
 	{
 		deviceCommandContext->BeginFramebufferPass(
 			deviceCommandContext->GetDevice()->GetCurrentBackbuffer());
 	}
 
 	{
 		PROFILE3_GPU("clear buffers");
 		// We don't need to clear the color attachment of the framebuffer if the sky
 		// is going to be rendered. Because it covers the whole view.
 		deviceCommandContext->ClearFramebuffer(!m->skyManager.IsSkyVisible(), true, true);
 	}
 
 	m->skyManager.RenderSky(deviceCommandContext);
 
 	// render submitted patches and models
 	RenderPatches(deviceCommandContext, context, cullGroup);
 
 	// render debug-related terrain overlays
 	ITerrainOverlay::RenderOverlaysBeforeWater(deviceCommandContext);
 
 	// render other debug-related overlays before water (so they can be seen when underwater)
 	m->overlayRenderer.RenderOverlaysBeforeWater(deviceCommandContext);
 
 	RenderModels(deviceCommandContext, context, cullGroup);
 
 	// render water
 	if (m->waterManager.m_RenderWater && g_Game && waterScissor.GetVolume() > 0)
 	{
 		if (m->waterManager.WillRenderFancyWater())
 		{
 			// Render transparent stuff, but only the solid parts that can occlude block water.
 			RenderTransparentModels(deviceCommandContext, context, cullGroup, TRANSPARENT_OPAQUE);
 
 			m->terrainRenderer.RenderWater(deviceCommandContext, context, cullGroup, &m->shadow);
 
 			// Render transparent stuff again, but only the blended parts that overlap water.
 			RenderTransparentModels(deviceCommandContext, context, cullGroup, TRANSPARENT_BLEND);
 		}
 		else
 		{
 			m->terrainRenderer.RenderWater(deviceCommandContext, context, cullGroup, &m->shadow);
 
 			// Render transparent stuff, so it can overlap models/terrain.
 			RenderTransparentModels(deviceCommandContext, context, cullGroup, TRANSPARENT);
 		}
 	}
 	else
 	{
 		// render transparent stuff, so it can overlap models/terrain
 		RenderTransparentModels(deviceCommandContext, context, cullGroup, TRANSPARENT);
 	}
 
 	// render debug-related terrain overlays
 	ITerrainOverlay::RenderOverlaysAfterWater(deviceCommandContext, cullGroup);
 
 	// render some other overlays after water (so they can be displayed on top of water)
 	m->overlayRenderer.RenderOverlaysAfterWater(deviceCommandContext);
 
 	// particles are transparent so render after water
 	if (g_RenderingOptions.GetParticles())
 	{
 		RenderParticles(deviceCommandContext, cullGroup);
 	}
 
 	if (postprocManager.IsEnabled())
 	{
 		deviceCommandContext->EndFramebufferPass();
 
 		if (g_Renderer.GetPostprocManager().IsMultisampleEnabled())
 			g_Renderer.GetPostprocManager().ResolveMultisampleFramebuffer(deviceCommandContext);
 
 		postprocManager.ApplyPostproc(deviceCommandContext);
 		postprocManager.ReleaseRenderOutput(deviceCommandContext);
 		deviceCommandContext->BeginFramebufferPass(
 			deviceCommandContext->GetDevice()->GetCurrentBackbuffer());
 	}
 
 	if (g_RenderingOptions.GetSilhouettes())
 	{
 		RenderSilhouettes(deviceCommandContext, context);
 	}
 
 	// render debug lines
 	if (g_RenderingOptions.GetDisplayFrustum())
 		DisplayFrustum();
 
 	if (g_RenderingOptions.GetDisplayShadowsFrustum())
 		m->shadow.RenderDebugBounds();
 
 	m->silhouetteRenderer.RenderDebugBounds(deviceCommandContext);
 	m->silhouetteRenderer.RenderDebugOverlays(deviceCommandContext);
 
 	// render overlays that should appear on top of all other objects
 	m->overlayRenderer.RenderForegroundOverlays(deviceCommandContext, m_ViewCamera);
 
 	deviceCommandContext->EndFramebufferPass();
 }
 
 void CSceneRenderer::EndFrame()
 {
 	// empty lists
 	m->terrainRenderer.EndFrame();
 	m->overlayRenderer.EndFrame();
 	m->particleRenderer.EndFrame();
 	m->silhouetteRenderer.EndFrame();
 
 	// Finish model renderers
 	m->Model.NormalSkinned->EndFrame();
 	m->Model.TranspSkinned->EndFrame();
 	if (m->Model.NormalUnskinned != m->Model.NormalSkinned)
 		m->Model.NormalUnskinned->EndFrame();
 	if (m->Model.TranspUnskinned != m->Model.TranspSkinned)
 		m->Model.TranspUnskinned->EndFrame();
 }
 
 void CSceneRenderer::DisplayFrustum()
 {
 	g_Renderer.GetDebugRenderer().DrawCameraFrustum(m_CullCamera, CColor(1.0f, 1.0f, 1.0f, 0.25f), 2);
 	g_Renderer.GetDebugRenderer().DrawCameraFrustum(m_CullCamera, CColor(1.0f, 1.0f, 1.0f, 1.0f), 2, true);
 }
 
 // Text overlay rendering
 void CSceneRenderer::RenderTextOverlays(CCanvas2D& canvas)
 {
 	PROFILE3_GPU("text overlays");
 
 	if (m_DisplayTerrainPriorities)
 		m->terrainRenderer.RenderPriorities(canvas, CULL_DEFAULT);
 }
 
 // SetSceneCamera: setup projection and transform of camera and adjust viewport to current view
 // The camera always represents the actual camera used to render a scene, not any virtual camera
 // used for shadow rendering or reflections.
 void CSceneRenderer::SetSceneCamera(const CCamera& viewCamera, const CCamera& cullCamera)
 {
 	m_ViewCamera = viewCamera;
 	m_CullCamera = cullCamera;
 
 	if (g_RenderingOptions.GetShadows())
 		m->shadow.SetupFrame(m_CullCamera, m_LightEnv->GetSunDir());
 }
 
 void CSceneRenderer::Submit(CPatch* patch)
 {
 	if (m_CurrentCullGroup == CULL_DEFAULT)
 	{
 		m->shadow.AddShadowReceiverBound(patch->GetWorldBounds());
 		m->silhouetteRenderer.AddOccluder(patch);
 	}
 
 	if (CULL_SHADOWS_CASCADE_0 <= m_CurrentCullGroup && m_CurrentCullGroup <= CULL_SHADOWS_CASCADE_3)
 	{
 		const int cascade = m_CurrentCullGroup - CULL_SHADOWS_CASCADE_0;
 		m->shadow.AddShadowCasterBound(cascade, patch->GetWorldBounds());
 	}
 
 	m->terrainRenderer.Submit(m_CurrentCullGroup, patch);
 }
 
 void CSceneRenderer::Submit(SOverlayLine* overlay)
 {
 	// Overlays are only needed in the default cull group for now,
 	// so just ignore submissions to any other group
 	if (m_CurrentCullGroup == CULL_DEFAULT)
 		m->overlayRenderer.Submit(overlay);
 }
 
 void CSceneRenderer::Submit(SOverlayTexturedLine* overlay)
 {
 	if (m_CurrentCullGroup == CULL_DEFAULT)
 		m->overlayRenderer.Submit(overlay);
 }
 
 void CSceneRenderer::Submit(SOverlaySprite* overlay)
 {
 	if (m_CurrentCullGroup == CULL_DEFAULT)
 		m->overlayRenderer.Submit(overlay);
 }
 
 void CSceneRenderer::Submit(SOverlayQuad* overlay)
 {
 	if (m_CurrentCullGroup == CULL_DEFAULT)
 		m->overlayRenderer.Submit(overlay);
 }
 
 void CSceneRenderer::Submit(SOverlaySphere* overlay)
 {
 	if (m_CurrentCullGroup == CULL_DEFAULT)
 		m->overlayRenderer.Submit(overlay);
 }
 
 void CSceneRenderer::Submit(CModelDecal* decal)
 {
 	// Decals can't cast shadows since they're flat on the terrain.
 	// They can receive shadows, but the terrain under them will have
 	// already been passed to AddShadowCasterBound, so don't bother
 	// doing it again here.
 
 	m->terrainRenderer.Submit(m_CurrentCullGroup, decal);
 }
 
 void CSceneRenderer::Submit(CParticleEmitter* emitter)
 {
 	m->particleRenderer.Submit(m_CurrentCullGroup, emitter);
 }
 
 void CSceneRenderer::SubmitNonRecursive(CModel* model)
 {
 	if (m_CurrentCullGroup == CULL_DEFAULT)
 	{
 		m->shadow.AddShadowReceiverBound(model->GetWorldBounds());
 
 		if (model->GetFlags() & MODELFLAG_SILHOUETTE_OCCLUDER)
 			m->silhouetteRenderer.AddOccluder(model);
 		if (model->GetFlags() & MODELFLAG_SILHOUETTE_DISPLAY)
 			m->silhouetteRenderer.AddCaster(model);
 	}
 
 	if (CULL_SHADOWS_CASCADE_0 <= m_CurrentCullGroup && m_CurrentCullGroup <= CULL_SHADOWS_CASCADE_3)
 	{
 		if (!(model->GetFlags() & MODELFLAG_CASTSHADOWS))
 			return;
 
 		const int cascade = m_CurrentCullGroup - CULL_SHADOWS_CASCADE_0;
 		m->shadow.AddShadowCasterBound(cascade, model->GetWorldBounds());
 	}
 
 	bool requiresSkinning = (model->GetModelDef()->GetNumBones() != 0);
 
 	if (model->GetMaterial().UsesAlphaBlending())
 	{
 		if (requiresSkinning)
 			m->Model.TranspSkinned->Submit(m_CurrentCullGroup, model);
 		else
 			m->Model.TranspUnskinned->Submit(m_CurrentCullGroup, model);
 	}
 	else
 	{
 		if (requiresSkinning)
 			m->Model.NormalSkinned->Submit(m_CurrentCullGroup, model);
 		else
 			m->Model.NormalUnskinned->Submit(m_CurrentCullGroup, model);
 	}
 }
 
 // Render the given scene
 void CSceneRenderer::RenderScene(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext, Scene& scene)
 {
 	m_CurrentScene = &scene;
 
 	CFrustum frustum = m_CullCamera.GetFrustum();
 
 	m_CurrentCullGroup = CULL_DEFAULT;
 
 	scene.EnumerateObjects(frustum, this);
 
 	m->particleManager.RenderSubmit(*this, frustum);
 
 	if (g_RenderingOptions.GetSilhouettes())
 	{
 		m->silhouetteRenderer.ComputeSubmissions(m_ViewCamera);
 
 		m_CurrentCullGroup = CULL_DEFAULT;
 		m->silhouetteRenderer.RenderSubmitOverlays(*this);
 
 		m_CurrentCullGroup = CULL_SILHOUETTE_OCCLUDER;
 		m->silhouetteRenderer.RenderSubmitOccluders(*this);
 
 		m_CurrentCullGroup = CULL_SILHOUETTE_CASTER;
 		m->silhouetteRenderer.RenderSubmitCasters(*this);
 	}
 
 	if (g_RenderingOptions.GetShadows())
 	{
 		for (int cascade = 0; cascade <= m->shadow.GetCascadeCount(); ++cascade)
 		{
 			m_CurrentCullGroup = CULL_SHADOWS_CASCADE_0 + cascade;
 			const CFrustum shadowFrustum = m->shadow.GetShadowCasterCullFrustum(cascade);
 			scene.EnumerateObjects(shadowFrustum, this);
 		}
 	}
 
 	CBoundingBoxAligned waterScissor;
 	if (m->waterManager.m_RenderWater)
 	{
 		waterScissor = m->terrainRenderer.ScissorWater(CULL_DEFAULT, m_ViewCamera);
 
 		if (waterScissor.GetVolume() > 0 && m->waterManager.WillRenderFancyWater())
 		{
 			if (g_RenderingOptions.GetWaterReflection())
 			{
 				m_CurrentCullGroup = CULL_REFLECTIONS;
 
 				CCamera reflectionCamera;
 				ComputeReflectionCamera(reflectionCamera, waterScissor);
 
 				scene.EnumerateObjects(reflectionCamera.GetFrustum(), this);
 			}
 
 			if (g_RenderingOptions.GetWaterRefraction())
 			{
 				m_CurrentCullGroup = CULL_REFRACTIONS;
 
 				CCamera refractionCamera;
 				ComputeRefractionCamera(refractionCamera, waterScissor);
 
 				scene.EnumerateObjects(refractionCamera.GetFrustum(), this);
 			}
 
 			// Render the waves to the Fancy effects texture
 			m->waterManager.RenderWaves(deviceCommandContext, frustum);
 		}
 	}
 
 	m_CurrentCullGroup = -1;
 
 	RenderSubmissions(deviceCommandContext, waterScissor);
 
 	m_CurrentScene = NULL;
 }
 
 Scene& CSceneRenderer::GetScene()
 {
 	ENSURE(m_CurrentScene);
 	return *m_CurrentScene;
 }
 
 void CSceneRenderer::MakeShadersDirty()
 {
 	m->waterManager.m_NeedsReloading = true;
 }
 
 WaterManager& CSceneRenderer::GetWaterManager()
 {
 	return m->waterManager;
 }
 
 SkyManager& CSceneRenderer::GetSkyManager()
 {
 	return m->skyManager;
 }
 
 CParticleManager& CSceneRenderer::GetParticleManager()
 {
 	return m->particleManager;
 }
 
 TerrainRenderer& CSceneRenderer::GetTerrainRenderer()
 {
 	return m->terrainRenderer;
 }
 
 CMaterialManager& CSceneRenderer::GetMaterialManager()
 {
 	return m->materialManager;
 }
 
 ShadowMap& CSceneRenderer::GetShadowMap()
 {
 	return m->shadow;
 }
 
 void CSceneRenderer::ResetState()
 {
 	// Clear all emitters, that were created in previous games
 	GetParticleManager().ClearUnattachedEmitters();
 }
Index: ps/trunk/source/renderer/SkyManager.cpp
===================================================================
--- ps/trunk/source/renderer/SkyManager.cpp	(revision 27181)
+++ ps/trunk/source/renderer/SkyManager.cpp	(revision 27182)
@@ -1,351 +1,348 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "renderer/SkyManager.h"
 
 #include "graphics/LightEnv.h"
 #include "graphics/ShaderManager.h"
 #include "graphics/Terrain.h"
 #include "graphics/TextureManager.h"
 #include "lib/bits.h"
 #include "lib/tex/tex.h"
 #include "maths/MathUtil.h"
 #include "ps/CLogger.h"
 #include "ps/ConfigDB.h"
 #include "ps/CStr.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Filesystem.h"
 #include "ps/Game.h"
 #include "ps/VideoMode.h"
 #include "renderer/backend/IDevice.h"
 #include "renderer/Renderer.h"
 #include "renderer/SceneRenderer.h"
 #include "renderer/RenderingOptions.h"
 
 #include <algorithm>
 
 SkyManager::SkyManager()
 	: m_VertexArray(Renderer::Backend::IBuffer::Type::VERTEX, false)
 {
 	CFG_GET_VAL("showsky", m_SkyVisible);
 }
 
 void SkyManager::LoadAndUploadSkyTexturesIfNeeded(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext)
 {
 	if (m_VertexArray.GetNumberOfVertices() == 0)
 		CreateSkyCube();
 
 	if (m_SkyTextureCube)
 		return;
 
 	m_SkyTextureCube = g_Renderer.GetTextureManager().GetBlackTextureCube();
 
 	GPU_SCOPED_LABEL(deviceCommandContext, "Load Sky Textures");
 	static const CStrW images[NUMBER_OF_TEXTURES + 1] = {
 		L"front",
 		L"back",
 		L"top",
 		L"top",
 		L"right",
 		L"left"
 	};
 
 	/*for (size_t i = 0; i < ARRAY_SIZE(m_SkyTexture); ++i)
 	{
 		VfsPath path = VfsPath("art/textures/skies") / m_SkySet / (Path::String(s_imageNames[i])+L".dds");
 
 		CTextureProperties textureProps(path);
 		textureProps.SetWrap(GL_CLAMP_TO_EDGE);
 		CTexturePtr texture = g_Renderer.GetTextureManager().CreateTexture(textureProps);
 		texture->Prefetch();
 		m_SkyTexture[i] = texture;
 	}*/
 
 	///////////////////////////////////////////////////////////////////////////
 	// HACK: THE HORRIBLENESS HERE IS OVER 9000. The following code is a HUGE hack and will be removed completely
 	// as soon as all the hardcoded GL_TEXTURE_2D references are corrected in the TextureManager/OGL/tex libs.
 
 	Tex textures[NUMBER_OF_TEXTURES + 1];
 
 	for (size_t i = 0; i < NUMBER_OF_TEXTURES + 1; ++i)
 	{
 		VfsPath path = VfsPath("art/textures/skies") / m_SkySet / (Path::String(images[i]) + L".dds");
 
 		std::shared_ptr<u8> file;
 		size_t fileSize;
 		if (g_VFS->LoadFile(path, file, fileSize) != INFO::OK)
 		{
 			path = VfsPath("art/textures/skies") / m_SkySet / (Path::String(images[i]) + L".dds.cached.dds");
 			if (g_VFS->LoadFile(path, file, fileSize) != INFO::OK)
 			{
 				LOGERROR("Error creating sky cubemap '%s', can't load file: '%s'.", m_SkySet.ToUTF8().c_str(), path.string8().c_str());
 				return;
 			}
 		}
 
 		if (textures[i].decode(file, fileSize) != INFO::OK ||
 			textures[i].transform_to((textures[i].m_Flags | TEX_BOTTOM_UP | TEX_ALPHA) & ~(TEX_DXT | TEX_MIPMAPS)) != INFO::OK)
 		{
 			LOGERROR("Error creating sky cubemap '%s', can't decode file: '%s'.", m_SkySet.ToUTF8().c_str(), path.string8().c_str());
 			return;
 		}
 
 		if (!is_pow2(textures[i].m_Width) || !is_pow2(textures[i].m_Height))
 		{
 			LOGERROR("Error creating sky cubemap '%s', cube textures should have power of 2 sizes.", m_SkySet.ToUTF8().c_str());
 			return;
 		}
 
 		if (textures[i].m_Width != textures[0].m_Width || textures[i].m_Height != textures[0].m_Height)
 		{
 			LOGERROR("Error creating sky cubemap '%s', cube textures have different sizes.", m_SkySet.ToUTF8().c_str());
 			return;
 		}
 	}
 
 	std::unique_ptr<Renderer::Backend::ITexture> skyCubeMap =
 		g_VideoMode.GetBackendDevice()->CreateTexture("SkyCubeMap",
 			Renderer::Backend::ITexture::Type::TEXTURE_CUBE,
 			Renderer::Backend::ITexture::Usage::TRANSFER_DST |
 				Renderer::Backend::ITexture::Usage::SAMPLED,
 			Renderer::Backend::Format::R8G8B8A8_UNORM, textures[0].m_Width, textures[0].m_Height,
 			Renderer::Backend::Sampler::MakeDefaultSampler(
 				Renderer::Backend::Sampler::Filter::LINEAR,
 				Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE), 1, 1);
 
 	std::vector<u8> rotated;
 	for (size_t i = 0; i < NUMBER_OF_TEXTURES + 1; ++i)
 	{
 		u8* data = textures[i].get_data();
 
 		// We need to rotate the side if it's looking up or down.
 		// TODO: maybe it should be done during texture conversion.
 		if (i == 2 || i == 3)
 		{
 			rotated.resize(textures[i].m_DataSize);
 
 			for (size_t y = 0; y < textures[i].m_Height; ++y)
 			{
 				for (size_t x = 0; x < textures[i].m_Width; ++x)
 				{
 					const size_t invX = y;
 					const size_t invY = textures[i].m_Width - x - 1;
 
 					rotated[(y * textures[i].m_Width + x) * 4 + 0] = data[(invY * textures[i].m_Width + invX) * 4 + 0];
 					rotated[(y * textures[i].m_Width + x) * 4 + 1] = data[(invY * textures[i].m_Width + invX) * 4 + 1];
 					rotated[(y * textures[i].m_Width + x) * 4 + 2] = data[(invY * textures[i].m_Width + invX) * 4 + 2];
 					rotated[(y * textures[i].m_Width + x) * 4 + 3] = data[(invY * textures[i].m_Width + invX) * 4 + 3];
 				}
 			}
 
 			deviceCommandContext->UploadTexture(
 				skyCubeMap.get(), Renderer::Backend::Format::R8G8B8A8_UNORM,
 				&rotated[0], textures[i].m_DataSize, 0, i);
 		}
 		else
 		{
 			deviceCommandContext->UploadTexture(
 				skyCubeMap.get(), Renderer::Backend::Format::R8G8B8A8_UNORM,
 				data, textures[i].m_DataSize, 0, i);
 		}
 	}
 
 	m_SkyTextureCube = g_Renderer.GetTextureManager().WrapBackendTexture(std::move(skyCubeMap));
 	///////////////////////////////////////////////////////////////////////////
 }
 
 Renderer::Backend::ITexture* SkyManager::GetSkyCube()
 {
 	return m_SkyTextureCube->GetBackendTexture();
 }
 
 void SkyManager::SetSkySet(const CStrW& newSet)
 {
 	if (newSet == m_SkySet)
 		return;
 
 	m_SkyTextureCube.reset();
 
 	m_SkySet = newSet;
 }
 
 std::vector<CStrW> SkyManager::GetSkySets() const
 {
 	std::vector<CStrW> skies;
 
 	// Find all subdirectories in art/textures/skies
 
 	const VfsPath path(L"art/textures/skies/");
 	DirectoryNames subdirectories;
 	if (g_VFS->GetDirectoryEntries(path, 0, &subdirectories) != INFO::OK)
 	{
 		LOGERROR("Error opening directory '%s'", path.string8());
 		return std::vector<CStrW>(1, GetSkySet()); // just return what we currently have
 	}
 
 	for(size_t i = 0; i < subdirectories.size(); i++)
 		skies.push_back(subdirectories[i].string());
 	sort(skies.begin(), skies.end());
 
 	return skies;
 }
 
 void SkyManager::RenderSky(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext)
 {
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render sky");
 
 	if (!m_SkyVisible)
 		return;
 
 	// Do nothing unless SetSkySet was called
 	if (m_SkySet.empty() || !m_SkyTextureCube)
 		return;
 
 	const CCamera& camera = g_Renderer.GetSceneRenderer().GetViewCamera();
 
 	CShaderTechniquePtr skytech =
 		g_Renderer.GetShaderManager().LoadEffect(str_sky_simple);
 	deviceCommandContext->SetGraphicsPipelineState(
 		skytech->GetGraphicsPipelineStateDesc());
 	deviceCommandContext->BeginPass();
 	Renderer::Backend::IShaderProgram* shader = skytech->GetShader();
 	deviceCommandContext->SetTexture(
 		shader->GetBindingSlot(str_baseTex), m_SkyTextureCube->GetBackendTexture());
 
 	// Translate so the sky center is at the camera space origin.
 	CMatrix3D translate;
 	translate.SetTranslation(camera.GetOrientation().GetTranslation());
 
 	// Currently we have a hardcoded near plane in the projection matrix.
 	CMatrix3D scale;
 	scale.SetScaling(10.0f, 10.0f, 10.0f);
 
 	// Rotate so that the "left" face, which contains the brightest part of
 	// each skymap, is in the direction of the sun from our light
 	// environment.
 	CMatrix3D rotate;
 	rotate.SetYRotation(M_PI + g_Renderer.GetSceneRenderer().GetLightEnv().GetRotation());
 
 	const CMatrix3D transform = camera.GetViewProjection() * translate * rotate * scale;
 	deviceCommandContext->SetUniform(
 		shader->GetBindingSlot(str_transform), transform.AsFloatArray());
 
-	m_VertexArray.PrepareForRendering();
-	m_VertexArray.UploadIfNeeded(deviceCommandContext);
-
 	const uint32_t stride = m_VertexArray.GetStride();
 	const uint32_t firstVertexOffset = m_VertexArray.GetOffset() * stride;
 
 	deviceCommandContext->SetVertexAttributeFormat(
 		Renderer::Backend::VertexAttributeStream::POSITION, m_AttributePosition.format,
 		m_AttributePosition.offset, stride,
 		Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 	deviceCommandContext->SetVertexAttributeFormat(
 		Renderer::Backend::VertexAttributeStream::UV0, m_AttributeUV.format,
 		m_AttributeUV.offset, stride,
 		Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 
 	deviceCommandContext->SetVertexBuffer(
 		0, m_VertexArray.GetBuffer(), firstVertexOffset);
 
 	deviceCommandContext->Draw(0, m_VertexArray.GetNumberOfVertices());
 
 	deviceCommandContext->EndPass();
 }
 
 void SkyManager::CreateSkyCube()
 {
 	m_AttributePosition.format = Renderer::Backend::Format::R32G32B32_SFLOAT;
 	m_VertexArray.AddAttribute(&m_AttributePosition);
 
 	m_AttributeUV.format = Renderer::Backend::Format::R32G32B32_SFLOAT;
 	m_VertexArray.AddAttribute(&m_AttributeUV);
 
 	// 6 sides of cube with 6 vertices.
 	m_VertexArray.SetNumberOfVertices(6 * 6);
 	m_VertexArray.Layout();
 
 	VertexArrayIterator<CVector3D> attrPosition = m_AttributePosition.GetIterator<CVector3D>();
 	VertexArrayIterator<CVector3D> attrUV = m_AttributeUV.GetIterator<CVector3D>();
 
 #define ADD_VERTEX(U, V, W, VX, VY, VZ) \
 	STMT( \
 		attrPosition->X = VX; \
 		attrPosition->Y = VY; \
 		attrPosition->Z = VZ; \
 		++attrPosition; \
 		attrUV->X = U; \
 		attrUV->Y = V; \
 		attrUV->Z = W; \
 		++attrUV;)
 
 	// Axis -X
 	ADD_VERTEX(+1, +1, +1, -1.0f, -1.0f, -1.0f);
 	ADD_VERTEX(+1, +1, -1, -1.0f, -1.0f, +1.0f);
 	ADD_VERTEX(+1, -1, -1, -1.0f, +1.0f, +1.0f);
 	ADD_VERTEX(+1, +1, +1, -1.0f, -1.0f, -1.0f);
 	ADD_VERTEX(+1, -1, -1, -1.0f, +1.0f, +1.0f);
 	ADD_VERTEX(+1, -1, +1, -1.0f, +1.0f, -1.0f);
 
 	// Axis +X
 	ADD_VERTEX(-1, +1, -1, +1.0f, -1.0f, +1.0f);
 	ADD_VERTEX(-1, +1, +1, +1.0f, -1.0f, -1.0f);
 	ADD_VERTEX(-1, -1, +1, +1.0f, +1.0f, -1.0f);
 	ADD_VERTEX(-1, +1, -1, +1.0f, -1.0f, +1.0f);
 	ADD_VERTEX(-1, -1, +1, +1.0f, +1.0f, -1.0f);
 	ADD_VERTEX(-1, -1, -1, +1.0f, +1.0f, +1.0f);
 
 	// Axis -Y
 	ADD_VERTEX(-1, +1, +1, +1.0f, -1.0f, -1.0f);
 	ADD_VERTEX(-1, +1, -1, +1.0f, -1.0f, +1.0f);
 	ADD_VERTEX(+1, +1, -1, -1.0f, -1.0f, +1.0f);
 	ADD_VERTEX(-1, +1, +1, +1.0f, -1.0f, -1.0f);
 	ADD_VERTEX(+1, +1, -1, -1.0f, -1.0f, +1.0f);
 	ADD_VERTEX(+1, +1, +1, -1.0f, -1.0f, -1.0f);
 
 	// Axis +Y
 	ADD_VERTEX(+1, -1, +1, -1.0f, +1.0f, -1.0f);
 	ADD_VERTEX(+1, -1, -1, -1.0f, +1.0f, +1.0f);
 	ADD_VERTEX(-1, -1, -1, +1.0f, +1.0f, +1.0f);
 	ADD_VERTEX(+1, -1, +1, -1.0f, +1.0f, -1.0f);
 	ADD_VERTEX(-1, -1, -1, +1.0f, +1.0f, +1.0f);
 	ADD_VERTEX(-1, -1, +1, +1.0f, +1.0f, -1.0f);
 
 	// Axis -Z
 	ADD_VERTEX(-1, +1, +1, +1.0f, -1.0f, -1.0f);
 	ADD_VERTEX(+1, +1, +1, -1.0f, -1.0f, -1.0f);
 	ADD_VERTEX(+1, -1, +1, -1.0f, +1.0f, -1.0f);
 	ADD_VERTEX(-1, +1, +1, +1.0f, -1.0f, -1.0f);
 	ADD_VERTEX(+1, -1, +1, -1.0f, +1.0f, -1.0f);
 	ADD_VERTEX(-1, -1, +1, +1.0f, +1.0f, -1.0f);
 
 	// Axis +Z
 	ADD_VERTEX(+1, +1, -1, -1.0f, -1.0f, +1.0f);
 	ADD_VERTEX(-1, +1, -1, +1.0f, -1.0f, +1.0f);
 	ADD_VERTEX(-1, -1, -1, +1.0f, +1.0f, +1.0f);
 	ADD_VERTEX(+1, +1, -1, -1.0f, -1.0f, +1.0f);
 	ADD_VERTEX(-1, -1, -1, +1.0f, +1.0f, +1.0f);
 	ADD_VERTEX(+1, -1, -1, -1.0f, +1.0f, +1.0f);
 #undef ADD_VERTEX
 
 	m_VertexArray.Upload();
 	m_VertexArray.FreeBackingStore();
 }
Index: ps/trunk/source/renderer/TexturedLineRData.cpp
===================================================================
--- ps/trunk/source/renderer/TexturedLineRData.cpp	(revision 27181)
+++ ps/trunk/source/renderer/TexturedLineRData.cpp	(revision 27182)
@@ -1,468 +1,468 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "TexturedLineRData.h"
 
 #include "graphics/ShaderProgram.h"
 #include "graphics/Terrain.h"
 #include "maths/Frustum.h"
 #include "maths/MathUtil.h"
 #include "maths/Quaternion.h"
 #include "ps/CStrInternStatic.h"
 #include "renderer/OverlayRenderer.h"
 #include "renderer/Renderer.h"
 #include "simulation2/Simulation2.h"
 #include "simulation2/system/SimContext.h"
 #include "simulation2/components/ICmpWaterManager.h"
 
 /* Note: this implementation uses g_VBMan directly rather than access it through the nicer VertexArray interface,
  * because it allows you to work with variable amounts of vertices and indices more easily. New code should prefer
  * to use VertexArray where possible, though. */
 
 void CTexturedLineRData::Render(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const SOverlayTexturedLine& line, Renderer::Backend::IShaderProgram* shader)
 {
 	if (!m_VB || !m_VBIndices)
 		return; // might have failed to allocate
 
 	// -- render main line quad strip ----------------------
 
 	line.m_TextureBase->UploadBackendTextureIfNeeded(deviceCommandContext);
 	line.m_TextureMask->UploadBackendTextureIfNeeded(deviceCommandContext);
 
-	m_VB->m_Owner->UploadIfNeeded(deviceCommandContext);
-	m_VBIndices->m_Owner->UploadIfNeeded(deviceCommandContext);
+	ENSURE(!m_VB->m_Owner->GetBuffer()->IsDynamic());
+	ENSURE(!m_VBIndices->m_Owner->GetBuffer()->IsDynamic());
 
 	deviceCommandContext->SetTexture(
 		shader->GetBindingSlot(str_baseTex), line.m_TextureBase->GetBackendTexture());
 	deviceCommandContext->SetTexture(
 		shader->GetBindingSlot(str_maskTex), line.m_TextureMask->GetBackendTexture());
 	deviceCommandContext->SetUniform(
 		shader->GetBindingSlot(str_objectColor), line.m_Color.AsFloatArray());
 
 	const uint32_t stride = sizeof(CTexturedLineRData::SVertex);
 
 	deviceCommandContext->SetVertexAttributeFormat(
 		Renderer::Backend::VertexAttributeStream::POSITION,
 		Renderer::Backend::Format::R32G32B32_SFLOAT,
 		offsetof(CTexturedLineRData::SVertex, m_Position), stride,
 		Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 	deviceCommandContext->SetVertexAttributeFormat(
 		Renderer::Backend::VertexAttributeStream::UV0,
 		Renderer::Backend::Format::R32G32_SFLOAT,
 		offsetof(CTexturedLineRData::SVertex, m_UV), stride,
 		Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 	deviceCommandContext->SetVertexAttributeFormat(
 		Renderer::Backend::VertexAttributeStream::UV1,
 		Renderer::Backend::Format::R32G32_SFLOAT,
 		offsetof(CTexturedLineRData::SVertex, m_UV), stride,
 		Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 
 	deviceCommandContext->SetVertexBuffer(0, m_VB->m_Owner->GetBuffer(), 0);
 
 	deviceCommandContext->SetIndexBuffer(m_VBIndices->m_Owner->GetBuffer());
 	deviceCommandContext->DrawIndexed(m_VBIndices->m_Index, m_VBIndices->m_Count, 0);
 
 	g_Renderer.GetStats().m_DrawCalls++;
 	g_Renderer.GetStats().m_OverlayTris += m_VBIndices->m_Count/3;
 }
 
 void CTexturedLineRData::Update(const SOverlayTexturedLine& line)
 {
 	m_VBIndices.Reset();
 	m_VB.Reset();
 
 	if (!line.m_SimContext)
 	{
 		debug_warn(L"[TexturedLineRData] No SimContext set for textured overlay line, cannot render (no terrain data)");
 		return;
 	}
 
 	float v = 0.f;
 	std::vector<SVertex> vertices;
 	std::vector<u16> indices;
 
 	const size_t n = line.m_Coords.size(); // number of line points
 	bool closed = line.m_Closed;
 
 	ENSURE(n >= 2); // minimum needed to avoid errors (also minimum value to make sense, can't draw a line between 1 point)
 
 	// In each iteration, p1 is the position of vertex i, p0 is i-1, p2 is i+1.
 	// To avoid slightly expensive terrain computations we cycle these around and
 	// recompute p2 at the end of each iteration.
 
 	CVector3D p0;
 	CVector3D p1(line.m_Coords[0].X, 0, line.m_Coords[0].Y);
 	CVector3D p2(line.m_Coords[1].X, 0, line.m_Coords[1].Y);
 
 	if (closed)
 		// grab the ending point so as to close the loop
 		p0 = CVector3D(line.m_Coords[n - 1].X, 0, line.m_Coords[n - 1].Y);
 	else
 		// we don't want to loop around and use the direction towards the other end of the line, so create an artificial p0 that
 		// extends the p2 -> p1 direction, and use that point instead
 		p0 = p1 + (p1 - p2);
 
 	bool p1floating = false;
 	bool p2floating = false;
 
 	// Compute terrain heights, clamped to the water height (and remember whether
 	// each point was floating on water, for normal computation later)
 
 	// TODO: if we ever support more than one water level per map, recompute this per point
 	CmpPtr<ICmpWaterManager> cmpWaterManager(*line.m_SimContext, SYSTEM_ENTITY);
 	float w = cmpWaterManager ? cmpWaterManager->GetExactWaterLevel(p0.X, p0.Z) : 0.f;
 
 	const CTerrain& terrain = line.m_SimContext->GetTerrain();
 
 	p0.Y = terrain.GetExactGroundLevel(p0.X, p0.Z);
 	if (p0.Y < w)
 		p0.Y = w;
 
 	p1.Y = terrain.GetExactGroundLevel(p1.X, p1.Z);
 	if (p1.Y < w)
 	{
 		p1.Y = w;
 		p1floating = true;
 	}
 
 	p2.Y = terrain.GetExactGroundLevel(p2.X, p2.Z);
 	if (p2.Y < w)
 	{
 		p2.Y = w;
 		p2floating = true;
 	}
 
 	for (size_t i = 0; i < n; ++i)
 	{
 		// For vertex i, compute bisector of lines (i-1)..(i) and (i)..(i+1)
 		// perpendicular to terrain normal
 
 		// Normal is vertical if on water, else computed from terrain
 		CVector3D norm;
 		if (p1floating)
 			norm = CVector3D(0, 1, 0);
 		else
 			norm = terrain.CalcExactNormal(p1.X, p1.Z);
 
 		CVector3D b = ((p1 - p0).Normalized() + (p2 - p1).Normalized()).Cross(norm);
 
 		// Adjust bisector length to match the line thickness, along the line's width
 		float l = b.Dot((p2 - p1).Normalized().Cross(norm));
 		if (fabs(l) > 0.000001f) // avoid unlikely divide-by-zero
 			b *= line.m_Thickness / l;
 
 		// Push vertices and indices for each quad in GL_TRIANGLES order. The two triangles of each quad are indexed using
 		// the winding orders (BR, BL, TR) and (TR, BL, TL) (where BR is bottom-right of this iteration's quad, TR top-right etc).
 		SVertex vertex1(p1 + b + norm * OverlayRenderer::OVERLAY_VOFFSET, CVector2D(0.f, v));
 		SVertex vertex2(p1 - b + norm * OverlayRenderer::OVERLAY_VOFFSET, CVector2D(1.f, v));
 		vertices.push_back(vertex1);
 		vertices.push_back(vertex2);
 
 		u16 vertexCount = static_cast<u16>(vertices.size());
 		u16 index1 = vertexCount - 2; // index of vertex1 in this iteration (TR of this quad)
 		u16 index2 = vertexCount - 1; // index of the vertex2 in this iteration (TL of this quad)
 
 		if (i == 0)
 		{
 			// initial two vertices to continue building triangles from (n must be >= 2 for this to work)
 			indices.push_back(index1);
 			indices.push_back(index2);
 		}
 		else
 		{
 			u16 index1Prev = vertexCount - 4; // index of the vertex1 in the previous iteration (BR of this quad)
 			u16 index2Prev = vertexCount - 3; // index of the vertex2 in the previous iteration (BL of this quad)
 			ENSURE(index1Prev < vertexCount);
 			ENSURE(index2Prev < vertexCount);
 			// Add two corner points from last iteration and join with one of our own corners to create triangle 1
 			// (don't need to do this if i == 1 because i == 0 are the first two ones, they don't need to be copied)
 			if (i > 1)
 			{
 				indices.push_back(index1Prev);
 				indices.push_back(index2Prev);
 			}
 			indices.push_back(index1); // complete triangle 1
 
 			// create triangle 2, specifying the adjacent side's vertices in the opposite order from triangle 1
 			indices.push_back(index1);
 			indices.push_back(index2Prev);
 			indices.push_back(index2);
 		}
 
 		// alternate V coordinate for debugging
 		v = 1 - v;
 
 		// cycle the p's and compute the new p2
 		p0 = p1;
 		p1 = p2;
 		p1floating = p2floating;
 
 		// if in closed mode, wrap around the coordinate array for p2 -- otherwise, extend linearly
 		if (!closed && i == n-2)
 			// next iteration is the last point of the line, so create an artificial p2 that extends the p0 -> p1 direction
 			p2 = p1 + (p1 - p0);
 		else
 			p2 = CVector3D(line.m_Coords[(i + 2) % n].X, 0, line.m_Coords[(i + 2) % n].Y);
 
 		p2.Y = terrain.GetExactGroundLevel(p2.X, p2.Z);
 		if (p2.Y < w)
 		{
 			p2.Y = w;
 			p2floating = true;
 		}
 		else
 			p2floating = false;
 	}
 
 	if (closed)
 	{
 		// close the path
 		if (n % 2 == 0)
 		{
 			u16 vertexCount = static_cast<u16>(vertices.size());
 			indices.push_back(vertexCount - 2);
 			indices.push_back(vertexCount - 1);
 			indices.push_back(0);
 
 			indices.push_back(0);
 			indices.push_back(vertexCount - 1);
 			indices.push_back(1);
 		}
 		else
 		{
 			// add two vertices to have the good UVs for the last quad
 			SVertex vertex1(vertices[0].m_Position, CVector2D(0.f, 1.f));
 			SVertex vertex2(vertices[1].m_Position, CVector2D(1.f, 1.f));
 			vertices.push_back(vertex1);
 			vertices.push_back(vertex2);
 
 			u16 vertexCount = static_cast<u16>(vertices.size());
 			indices.push_back(vertexCount - 4);
 			indices.push_back(vertexCount - 3);
 			indices.push_back(vertexCount - 2);
 
 			indices.push_back(vertexCount - 2);
 			indices.push_back(vertexCount - 3);
 			indices.push_back(vertexCount - 1);
 		}
 	}
 	else
 	{
 		// Create start and end caps. On either end, this is done by taking the centroid between the last and second-to-last pair of
 		// vertices that was generated along the path (i.e. the vertex1's and vertex2's from above), taking a directional vector
 		// between them, and drawing the line cap in the plane given by the two butt-end corner points plus said vector.
 		std::vector<u16> capIndices;
 		std::vector<SVertex> capVertices;
 
 		// create end cap
 		CreateLineCap(
 			line,
 			// the order of these vertices is important here, swapping them produces caps at the wrong side
 			vertices[vertices.size()-2].m_Position, // top-right vertex of last quad
 			vertices[vertices.size()-1].m_Position, // top-left vertex of last quad
 			// directional vector between centroids of last vertex pair and second-to-last vertex pair
 			(Centroid(vertices[vertices.size()-2], vertices[vertices.size()-1]) - Centroid(vertices[vertices.size()-4], vertices[vertices.size()-3])).Normalized(),
 			line.m_EndCapType,
 			capVertices,
 			capIndices
 		);
 
 		for (unsigned i = 0; i < capIndices.size(); i++)
 			capIndices[i] += static_cast<u16>(vertices.size());
 
 		vertices.insert(vertices.end(), capVertices.begin(), capVertices.end());
 		indices.insert(indices.end(), capIndices.begin(), capIndices.end());
 
 		capIndices.clear();
 		capVertices.clear();
 
 		// create start cap
 		CreateLineCap(
 			line,
 			// the order of these vertices is important here, swapping them produces caps at the wrong side
 			vertices[1].m_Position,
 			vertices[0].m_Position,
 			// directional vector between centroids of first vertex pair and second vertex pair
 			(Centroid(vertices[1], vertices[0]) - Centroid(vertices[3], vertices[2])).Normalized(),
 			line.m_StartCapType,
 			capVertices,
 			capIndices
 		);
 
 		for (unsigned i = 0; i < capIndices.size(); i++)
 			capIndices[i] += static_cast<u16>(vertices.size());
 
 		vertices.insert(vertices.end(), capVertices.begin(), capVertices.end());
 		indices.insert(indices.end(), capIndices.begin(), capIndices.end());
 	}
 
 	if (vertices.empty() || indices.empty())
 		return;
 
 	// Indices for triangles, so must be multiple of 3.
 	ENSURE(indices.size() % 3 == 0);
 
 	m_BoundingBox = CBoundingBoxAligned();
 	for (const SVertex& vertex : vertices)
 		m_BoundingBox += vertex.m_Position;
 
 	m_VB = g_VBMan.AllocateChunk(
 		sizeof(SVertex), vertices.size(), Renderer::Backend::IBuffer::Type::VERTEX, false);
 	// Allocation might fail (e.g. due to too many vertices).
 	if (m_VB)
 	{
 		// Copy data into backend buffer.
 		m_VB->m_Owner->UpdateChunkVertices(m_VB.Get(), &vertices[0]);
 
 		for (size_t k = 0; k < indices.size(); ++k)
 			indices[k] += static_cast<u16>(m_VB->m_Index);
 
 		m_VBIndices = g_VBMan.AllocateChunk(
 			sizeof(u16), indices.size(), Renderer::Backend::IBuffer::Type::INDEX, false);
 		if (m_VBIndices)
 			m_VBIndices->m_Owner->UpdateChunkVertices(m_VBIndices.Get(), &indices[0]);
 	}
 
 }
 
 void CTexturedLineRData::CreateLineCap(const SOverlayTexturedLine& line, const CVector3D& corner1, const CVector3D& corner2,
 	const CVector3D& lineDirectionNormal, SOverlayTexturedLine::LineCapType endCapType, std::vector<SVertex>& verticesOut,
 	std::vector<u16>& indicesOut)
 {
 	if (endCapType == SOverlayTexturedLine::LINECAP_FLAT)
 		return; // no action needed, this is the default
 
 	// When not in closed mode, we've created artificial points for the start- and endpoints that extend the line in the
 	// direction of the first and the last segment, respectively. Thus, we know both the start and endpoints have perpendicular
 	// butt endings, i.e. the end corner vertices on either side of the line extend perpendicularly from the segment direction.
 	// That is to say, when viewed from the top, we will have something like
 	//                                                 .
 	//  this:                     and not like this:  /|
 	//         ----+                                 / |
 	//             |                                /  .
 	//             |                                  /
 	//         ----+                                 /
 	//
 
 	int roundCapPoints = 8; // amount of points to sample along the semicircle for rounded caps (including corner points)
 	float radius = line.m_Thickness;
 
 	CVector3D centerPoint = (corner1 + corner2) * 0.5f;
 	SVertex centerVertex(centerPoint, CVector2D(0.5f, 0.5f));
 	u16 indexOffset = static_cast<u16>(verticesOut.size()); // index offset in verticesOut from where we start adding our vertices
 
 	switch (endCapType)
 	{
 	case SOverlayTexturedLine::LINECAP_SHARP:
 		{
 			roundCapPoints = 3; // creates only one point directly ahead
 			radius *= 1.5f; // make it a bit sharper (note that we don't use the radius for the butt-end corner points so it should be ok)
 			centerVertex.m_UV.X = 0.480f; // slight visual correction to make the texture match up better at the corner points
 		}
 		FALLTHROUGH;
 	case SOverlayTexturedLine::LINECAP_ROUND:
 		{
 			// Draw a rounded line cap in the 3D plane of the line specified by the two corner points and the normal vector of the
 			// line's direction. The terrain normal at the centroid between the two corner points is perpendicular to this plane.
 			// The way this works is by taking a vector from the corner points' centroid to one of the corner points (which is then
 			// of radius length), and rotate it around the terrain normal vector in that centroid. This will rotate the vector in
 			// the line's plane, producing the desired rounded cap.
 
 			// To please OpenGL's winding order, this angle needs to be negated depending on whether we start rotating from
 			// the (center -> corner1) or (center -> corner2) vector. For the (center -> corner2) vector, we apparently need to use
 			// the negated angle.
 			float stepAngle = -(float)(M_PI/(roundCapPoints-1));
 
 			// Push the vertices in triangle fan order (easy to generate GL_TRIANGLES indices for afterwards)
 			// Note that we're manually adding the corner vertices instead of having them be generated by the rotating vector.
 			// This is because we want to support an overly large radius to make the sharp line ending look sharper.
 			verticesOut.push_back(centerVertex);
 			verticesOut.push_back(SVertex(corner2, CVector2D()));
 
 			// Get the base vector that we will incrementally rotate in the cap plane to produce the radial sample points.
 			// Normally corner2 - centerPoint would suffice for this since it is of radius length, but we want to support custom
 			// radii to support tuning the 'sharpness' of sharp end caps (see above)
 			CVector3D rotationBaseVector = (corner2 - centerPoint).Normalized() * radius;
 			// Calculate the normal vector of the plane in which we're going to be drawing the line cap. This is the vector that
 			// is perpendicular to both baseVector and the 'lineDirectionNormal' vector indicating the direction of the line.
 			// Note that we shouldn't use terrain->CalcExactNormal() here because if the line is being rendered on top of water,
 			// then CalcExactNormal will return the normal vector of the terrain that's underwater (which can be quite funky).
 			CVector3D capPlaneNormal = lineDirectionNormal.Cross(rotationBaseVector).Normalized();
 
 			for (int i = 1; i < roundCapPoints - 1; ++i)
 			{
 				// Rotate the centerPoint -> corner vector by i*stepAngle radians around the cap plane normal at the center point.
 				CQuaternion quatRotation;
 				quatRotation.FromAxisAngle(capPlaneNormal, i * stepAngle);
 				CVector3D worldPos3D = centerPoint + quatRotation.Rotate(rotationBaseVector);
 
 				// Let v range from 0 to 1 as we move along the semi-circle, keep u fixed at 0 (i.e. curve the left vertical edge
 				// of the texture around the edge of the semicircle)
 				float u = 0.f;
 				float v = Clamp((i / static_cast<float>(roundCapPoints - 1)), 0.f, 1.f); // pos, u, v
 				verticesOut.push_back(SVertex(worldPos3D, CVector2D(u, v)));
 			}
 
 			// connect back to the other butt-end corner point to complete the semicircle
 			verticesOut.push_back(SVertex(corner1, CVector2D(0.f, 1.f)));
 
 			// now push indices in GL_TRIANGLES order; vertices[indexOffset] is the center vertex, vertices[indexOffset + 1] is the
 			// first corner point, then a bunch of radial samples, and then at the end we have the other corner point again. So:
 			for (int i=1; i < roundCapPoints; ++i)
 			{
 				indicesOut.push_back(indexOffset); // center vertex
 				indicesOut.push_back(indexOffset + i);
 				indicesOut.push_back(indexOffset + i + 1);
 			}
 		}
 		break;
 
 	case SOverlayTexturedLine::LINECAP_SQUARE:
 		{
 			// Extend the (corner1 -> corner2) vector along the direction normal and draw a square line ending consisting of
 			// three triangles (sort of like a triangle fan)
 			// NOTE: The order in which the vertices are pushed out determines the visibility, as they
 			// are rendered only one-sided; the wrong order of vertices will make the cap visible only from the bottom.
 			verticesOut.push_back(centerVertex);
 			verticesOut.push_back(SVertex(corner2, CVector2D()));
 			verticesOut.push_back(SVertex(corner2 + (lineDirectionNormal * (line.m_Thickness)), CVector2D(0.f, 0.33333f))); // extend butt corner point 2 along the normal vector
 			verticesOut.push_back(SVertex(corner1 + (lineDirectionNormal * (line.m_Thickness)), CVector2D(0.f, 0.66666f))); // extend butt corner point 1 along the normal vector
 			verticesOut.push_back(SVertex(corner1, CVector2D(0.f, 1.0f))); // push butt corner point 1
 
 			for (int i=1; i < 4; ++i)
 			{
 				indicesOut.push_back(indexOffset); // center point
 				indicesOut.push_back(indexOffset + i);
 				indicesOut.push_back(indexOffset + i + 1);
 			}
 		}
 		break;
 
 	default:
 		break;
 	}
 
 }
 
 bool CTexturedLineRData::IsVisibleInFrustum(const CFrustum& frustum) const
 {
 	return frustum.IsBoxVisible(m_BoundingBox);
 }
Index: ps/trunk/source/renderer/WaterManager.cpp
===================================================================
--- ps/trunk/source/renderer/WaterManager.cpp	(revision 27181)
+++ ps/trunk/source/renderer/WaterManager.cpp	(revision 27182)
@@ -1,1087 +1,1087 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "graphics/Terrain.h"
 #include "graphics/TextureManager.h"
 #include "graphics/ShaderManager.h"
 #include "graphics/ShaderProgram.h"
 #include "lib/bits.h"
 #include "lib/timer.h"
 #include "maths/MathUtil.h"
 #include "maths/Vector2D.h"
 #include "ps/CLogger.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Game.h"
 #include "ps/VideoMode.h"
 #include "ps/World.h"
 #include "renderer/backend/IDevice.h"
 #include "renderer/Renderer.h"
 #include "renderer/RenderingOptions.h"
 #include "renderer/SceneRenderer.h"
 #include "renderer/WaterManager.h"
 #include "simulation2/Simulation2.h"
 #include "simulation2/components/ICmpWaterManager.h"
 #include "simulation2/components/ICmpRangeManager.h"
 
 #include <algorithm>
 
 struct CoastalPoint
 {
 	CoastalPoint(int idx, CVector2D pos) : index(idx), position(pos) {};
 	int index;
 	CVector2D position;
 };
 
 struct SWavesVertex
 {
 	// vertex position
 	CVector3D m_BasePosition;
 	CVector3D m_ApexPosition;
 	CVector3D m_SplashPosition;
 	CVector3D m_RetreatPosition;
 
 	CVector2D m_PerpVect;
 	u8 m_UV[3];
 
 	// pad to a power of two
 	u8 m_Padding[5];
 };
 cassert(sizeof(SWavesVertex) == 64);
 
 struct WaveObject
 {
 	CVertexBufferManager::Handle m_VBVertices;
 	CBoundingBoxAligned m_AABB;
 	size_t m_Width;
 	float m_TimeDiff;
 };
 
 WaterManager::WaterManager()
 {
 	// water
 	m_RenderWater = false; // disabled until textures are successfully loaded
 	m_WaterHeight = 5.0f;
 
 	m_RefTextureSize = 0;
 
 	m_WaterTexTimer = 0.0;
 
 	m_WindAngle = 0.0f;
 	m_Waviness = 8.0f;
 	m_WaterColor = CColor(0.3f, 0.35f, 0.7f, 1.0f);
 	m_WaterTint = CColor(0.28f, 0.3f, 0.59f, 1.0f);
 	m_Murkiness = 0.45f;
 	m_RepeatPeriod = 16.0f;
 
 	m_WaterEffects = true;
 	m_WaterFancyEffects = false;
 	m_WaterRealDepth = false;
 	m_WaterRefraction = false;
 	m_WaterReflection = false;
 	m_WaterType = L"ocean";
 
 	m_NeedsReloading = false;
 	m_NeedInfoUpdate = true;
 
 	m_MapSize = 0;
 
 	m_updatei0 = 0;
 	m_updatej0 = 0;
 	m_updatei1 = 0;
 	m_updatej1 = 0;
 }
 
 WaterManager::~WaterManager()
 {
 	// Cleanup if the caller messed up
 	UnloadWaterTextures();
 
 	m_ShoreWaves.clear();
 	m_ShoreWavesVBIndices.Reset();
 
 	m_DistanceHeightmap.reset();
 	m_WindStrength.reset();
 
 	m_FancyEffectsFramebuffer.reset();
 	m_RefractionFramebuffer.reset();
 	m_ReflectionFramebuffer.reset();
 
 	m_FancyTexture.reset();
 	m_FancyTextureDepth.reset();
 	m_ReflFboDepthTexture.reset();
 	m_RefrFboDepthTexture.reset();
 }
 
 
 ///////////////////////////////////////////////////////////////////
 // Progressive load of water textures
 int WaterManager::LoadWaterTextures()
 {
 	// TODO: this doesn't need to be progressive-loading any more
 	// (since texture loading is async now)
 
 	wchar_t pathname[PATH_MAX];
 
 	// Load diffuse grayscale images (for non-fancy water)
 	for (size_t i = 0; i < ARRAY_SIZE(m_WaterTexture); ++i)
 	{
 		swprintf_s(pathname, ARRAY_SIZE(pathname), L"art/textures/animated/water/default/diffuse%02d.dds", (int)i+1);
 		CTextureProperties textureProps(pathname);
 		textureProps.SetAddressMode(
 			Renderer::Backend::Sampler::AddressMode::REPEAT);
 
 		CTexturePtr texture = g_Renderer.GetTextureManager().CreateTexture(textureProps);
 		texture->Prefetch();
 		m_WaterTexture[i] = texture;
 	}
 
 	m_RenderWater = true;
 
 	// Load normalmaps (for fancy water)
 	ReloadWaterNormalTextures();
 
 	// Load CoastalWaves
 	{
 		CTextureProperties textureProps(L"art/textures/terrain/types/water/coastalWave.png");
 		textureProps.SetAddressMode(
 			Renderer::Backend::Sampler::AddressMode::REPEAT);
 		CTexturePtr texture = g_Renderer.GetTextureManager().CreateTexture(textureProps);
 		texture->Prefetch();
 		m_WaveTex = texture;
 	}
 
 	// Load Foam
 	{
 		CTextureProperties textureProps(L"art/textures/terrain/types/water/foam.png");
 		textureProps.SetAddressMode(
 			Renderer::Backend::Sampler::AddressMode::REPEAT);
 		CTexturePtr texture = g_Renderer.GetTextureManager().CreateTexture(textureProps);
 		texture->Prefetch();
 		m_FoamTex = texture;
 	}
 
 	RecreateOrLoadTexturesIfNeeded();
 
 	return 0;
 }
 
 void WaterManager::RecreateOrLoadTexturesIfNeeded()
 {
 	Renderer::Backend::IDevice* backendDevice = g_VideoMode.GetBackendDevice();
 
 	// Use screen-sized textures for minimum artifacts.
 	const size_t newRefTextureSize = round_up_to_pow2(g_Renderer.GetHeight());
 
 	if (m_RefTextureSize != newRefTextureSize)
 	{
 		m_ReflectionFramebuffer.reset();
 		m_ReflectionTexture.reset();
 		m_ReflFboDepthTexture.reset();
 
 		m_RefractionFramebuffer.reset();
 		m_RefractionTexture.reset();
 		m_RefrFboDepthTexture.reset();
 
 		m_RefTextureSize = newRefTextureSize;
 	}
 
 	// Create reflection textures.
 	const bool needsReflectionTextures =
 		g_RenderingOptions.GetWaterEffects() &&
 		g_RenderingOptions.GetWaterReflection();
 	if (needsReflectionTextures && !m_ReflectionTexture)
 	{
 		m_ReflectionTexture = backendDevice->CreateTexture2D("WaterReflectionTexture",
 			Renderer::Backend::ITexture::Usage::SAMPLED |
 				Renderer::Backend::ITexture::Usage::COLOR_ATTACHMENT,
 			Renderer::Backend::Format::R8G8B8A8_UNORM, m_RefTextureSize, m_RefTextureSize,
 			Renderer::Backend::Sampler::MakeDefaultSampler(
 				Renderer::Backend::Sampler::Filter::LINEAR,
 				Renderer::Backend::Sampler::AddressMode::MIRRORED_REPEAT));
 
 		m_ReflFboDepthTexture = backendDevice->CreateTexture2D("WaterReflectionDepthTexture",
 			Renderer::Backend::ITexture::Usage::SAMPLED |
 				Renderer::Backend::ITexture::Usage::DEPTH_STENCIL_ATTACHMENT,
 			Renderer::Backend::Format::D32, m_RefTextureSize, m_RefTextureSize,
 			Renderer::Backend::Sampler::MakeDefaultSampler(
 				Renderer::Backend::Sampler::Filter::NEAREST,
 				Renderer::Backend::Sampler::AddressMode::REPEAT));
 
 		m_ReflectionFramebuffer = backendDevice->CreateFramebuffer("ReflectionFramebuffer",
 			m_ReflectionTexture.get(), m_ReflFboDepthTexture.get(), CColor(0.5f, 0.5f, 1.0f, 0.0f));
 		if (!m_ReflectionFramebuffer)
 		{
 			g_RenderingOptions.SetWaterReflection(false);
 			UpdateQuality();
 		}
 	}
 
 	// Create refraction textures.
 	const bool needsRefractionTextures =
 		g_RenderingOptions.GetWaterEffects() &&
 		g_RenderingOptions.GetWaterRefraction();
 	if (needsRefractionTextures && !m_RefractionTexture)
 	{
 		m_RefractionTexture = backendDevice->CreateTexture2D("WaterRefractionTexture",
 			Renderer::Backend::ITexture::Usage::SAMPLED |
 				Renderer::Backend::ITexture::Usage::COLOR_ATTACHMENT,
 			Renderer::Backend::Format::R8G8B8A8_UNORM, m_RefTextureSize, m_RefTextureSize,
 			Renderer::Backend::Sampler::MakeDefaultSampler(
 				Renderer::Backend::Sampler::Filter::LINEAR,
 				Renderer::Backend::Sampler::AddressMode::MIRRORED_REPEAT));
 
 		m_RefrFboDepthTexture = backendDevice->CreateTexture2D("WaterRefractionDepthTexture",
 			Renderer::Backend::ITexture::Usage::SAMPLED |
 				Renderer::Backend::ITexture::Usage::DEPTH_STENCIL_ATTACHMENT,
 			Renderer::Backend::Format::D32, m_RefTextureSize, m_RefTextureSize,
 			Renderer::Backend::Sampler::MakeDefaultSampler(
 				Renderer::Backend::Sampler::Filter::NEAREST,
 				Renderer::Backend::Sampler::AddressMode::REPEAT));
 
 		m_RefractionFramebuffer = backendDevice->CreateFramebuffer("RefractionFramebuffer",
 			m_RefractionTexture.get(), m_RefrFboDepthTexture.get(), CColor(1.0f, 0.0f, 0.0f, 0.0f));
 		if (!m_RefractionFramebuffer)
 		{
 			g_RenderingOptions.SetWaterRefraction(false);
 			UpdateQuality();
 		}
 	}
 
 	const uint32_t newWidth = static_cast<uint32_t>(g_Renderer.GetWidth());
 	const uint32_t newHeight = static_cast<uint32_t>(g_Renderer.GetHeight());
 	if (m_FancyTexture && (m_FancyTexture->GetWidth() != newWidth || m_FancyTexture->GetHeight() != newHeight))
 	{
 		m_FancyEffectsFramebuffer.reset();
 		m_FancyTexture.reset();
 		m_FancyTextureDepth.reset();
 	}
 
 	// Create the Fancy Effects textures.
 	const bool needsFancyTextures =
 		g_RenderingOptions.GetWaterEffects() &&
 		g_RenderingOptions.GetWaterFancyEffects();
 	if (needsFancyTextures && !m_FancyTexture)
 	{
 		m_FancyTexture = backendDevice->CreateTexture2D("WaterFancyTexture",
 			Renderer::Backend::ITexture::Usage::SAMPLED |
 				Renderer::Backend::ITexture::Usage::COLOR_ATTACHMENT,
 			Renderer::Backend::Format::R8G8B8A8_UNORM, g_Renderer.GetWidth(), g_Renderer.GetHeight(),
 			Renderer::Backend::Sampler::MakeDefaultSampler(
 				Renderer::Backend::Sampler::Filter::LINEAR,
 				Renderer::Backend::Sampler::AddressMode::REPEAT));
 
 		m_FancyTextureDepth = backendDevice->CreateTexture2D("WaterFancyDepthTexture",
 			Renderer::Backend::ITexture::Usage::DEPTH_STENCIL_ATTACHMENT,
 			Renderer::Backend::Format::D32, g_Renderer.GetWidth(), g_Renderer.GetHeight(),
 			Renderer::Backend::Sampler::MakeDefaultSampler(
 				Renderer::Backend::Sampler::Filter::LINEAR,
 				Renderer::Backend::Sampler::AddressMode::REPEAT));
 
 		m_FancyEffectsFramebuffer = backendDevice->CreateFramebuffer("FancyEffectsFramebuffer",
 			m_FancyTexture.get(), m_FancyTextureDepth.get());
 		if (!m_FancyEffectsFramebuffer)
 		{
 			g_RenderingOptions.SetWaterRefraction(false);
 			UpdateQuality();
 		}
 	}
 }
 
 void WaterManager::ReloadWaterNormalTextures()
 {
 	wchar_t pathname[PATH_MAX];
 	for (size_t i = 0; i < ARRAY_SIZE(m_NormalMap); ++i)
 	{
 		swprintf_s(pathname, ARRAY_SIZE(pathname), L"art/textures/animated/water/%ls/normal00%02d.png", m_WaterType.c_str(), static_cast<int>(i) + 1);
 		CTextureProperties textureProps(pathname);
 		textureProps.SetAddressMode(
 			Renderer::Backend::Sampler::AddressMode::REPEAT);
 		textureProps.SetAnisotropicFilter(true);
 
 		CTexturePtr texture = g_Renderer.GetTextureManager().CreateTexture(textureProps);
 		texture->Prefetch();
 		m_NormalMap[i] = texture;
 	}
 }
 
 ///////////////////////////////////////////////////////////////////
 // Unload water textures
 void WaterManager::UnloadWaterTextures()
 {
 	for (size_t i = 0; i < ARRAY_SIZE(m_WaterTexture); i++)
 		m_WaterTexture[i].reset();
 
 	for (size_t i = 0; i < ARRAY_SIZE(m_NormalMap); i++)
 		m_NormalMap[i].reset();
 
 	m_RefractionFramebuffer.reset();
 	m_ReflectionFramebuffer.reset();
 	m_ReflectionTexture.reset();
 	m_RefractionTexture.reset();
 }
 
 template<bool Transpose>
 static inline void ComputeDirection(float* distanceMap, const u16* heightmap, float waterHeight, size_t SideSize, size_t maxLevel)
 {
 #define ABOVEWATER(x, z) (HEIGHT_SCALE * heightmap[z*SideSize + x] >= waterHeight)
 #define UPDATELOOKAHEAD \
 	for (; lookahead <= id2+maxLevel && lookahead < SideSize && \
 	       ((!Transpose && !ABOVEWATER(lookahead, id1)) || (Transpose && !ABOVEWATER(id1, lookahead))); ++lookahead)
 	// Algorithm:
 	// We want to know the distance to the closest shore point. Go through each line/column,
 	// keep track of when we encountered the last shore point and how far ahead the next one is.
 	for (size_t id1 = 0; id1 < SideSize; ++id1)
 	{
 		size_t id2 = 0;
 		const size_t& x = Transpose ? id1 : id2;
 		const size_t& z = Transpose ? id2 : id1;
 
 		size_t level = ABOVEWATER(x, z) ? 0 : maxLevel;
 		size_t lookahead = (size_t)(level > 0);
 
 		UPDATELOOKAHEAD;
 
 		// start moving
 		for (; id2 < SideSize; ++id2)
 		{
 			// update current level
 			if (ABOVEWATER(x, z))
 				level = 0;
 			else
 				level = std::min(level+1, maxLevel);
 
 			// move lookahead
 			if (lookahead == id2)
 				++lookahead;
 			UPDATELOOKAHEAD;
 
 			// This is the important bit: set the distance to either:
 			// - the distance to the previous shore point (level)
 			// - the distance to the next shore point (lookahead-id2)
 			distanceMap[z*SideSize + x] = std::min(distanceMap[z*SideSize + x], (float)std::min(lookahead-id2, level));
 		}
 	}
 #undef ABOVEWATER
 #undef UPDATELOOKAHEAD
 }
 
 ///////////////////////////////////////////////////////////////////
 // Calculate our binary heightmap from the terrain heightmap.
 void WaterManager::RecomputeDistanceHeightmap()
 {
 	CTerrain* terrain = g_Game->GetWorld()->GetTerrain();
 	if (!terrain || !terrain->GetHeightMap())
 		return;
 
 	size_t SideSize = m_MapSize;
 
 	// we want to look ahead some distance, but not too much (less efficient and not interesting). This is our lookahead.
 	const size_t maxLevel = 5;
 
 	if (!m_DistanceHeightmap)
 	{
 		m_DistanceHeightmap = std::make_unique<float[]>(SideSize * SideSize);
 		std::fill(m_DistanceHeightmap.get(), m_DistanceHeightmap.get() + SideSize * SideSize, static_cast<float>(maxLevel));
 	}
 
 	// Create a manhattan-distance heightmap.
 	// This could be refined to only be done near the coast itself, but it's probably not necessary.
 
 	u16* heightmap = terrain->GetHeightMap();
 
 	ComputeDirection<false>(m_DistanceHeightmap.get(), heightmap, m_WaterHeight, SideSize, maxLevel);
 	ComputeDirection<true>(m_DistanceHeightmap.get(), heightmap, m_WaterHeight, SideSize, maxLevel);
 }
 
 // This requires m_DistanceHeightmap to be defined properly.
 void WaterManager::CreateWaveMeshes()
 {
 	if (m_MapSize == 0)
 		return;
 
 	CTerrain* terrain = g_Game->GetWorld()->GetTerrain();
 	if (!terrain || !terrain->GetHeightMap())
 		return;
 
 	m_ShoreWaves.clear();
 	m_ShoreWavesVBIndices.Reset();
 
 	if (m_Waviness < 5.0f && m_WaterType != L"ocean")
 		return;
 
 	size_t SideSize = m_MapSize;
 
 	// First step: get the points near the coast.
 	std::set<int> CoastalPointsSet;
 	for (size_t z = 1; z < SideSize-1; ++z)
 		for (size_t x = 1; x < SideSize-1; ++x)
 			// get the points not on the shore but near it, ocean-side
 			if (m_DistanceHeightmap[z*m_MapSize + x] > 0.5f && m_DistanceHeightmap[z*m_MapSize + x] < 1.5f)
 				CoastalPointsSet.insert((z)*SideSize + x);
 
 	// Second step: create chains out of those coastal points.
 	static const int around[8][2] = { { -1,-1 }, { -1,0 }, { -1,1 }, { 0,1 }, { 1,1 }, { 1,0 }, { 1,-1 }, { 0,-1 } };
 
 	std::vector<std::deque<CoastalPoint> > CoastalPointsChains;
 	while (!CoastalPointsSet.empty())
 	{
 		int index = *(CoastalPointsSet.begin());
 		int x = index % SideSize;
 		int y = (index - x ) / SideSize;
 
 		std::deque<CoastalPoint> Chain;
 
 		Chain.push_front(CoastalPoint(index,CVector2D(x*4,y*4)));
 
 		// Erase us.
 		CoastalPointsSet.erase(CoastalPointsSet.begin());
 
 		// We're our starter points. At most we can have 2 points close to us.
 		// We'll pick the first one and look for its neighbors (he can only have one new)
 		// Up until we either reach the end of the chain, or ourselves.
 		// Then go down the other direction if there is any.
 		int neighbours[2] = { -1, -1 };
 		int nbNeighb = 0;
 		for (int i = 0; i < 8; ++i)
 		{
 			if (CoastalPointsSet.count(x + around[i][0] + (y + around[i][1])*SideSize))
 			{
 				if (nbNeighb < 2)
 					neighbours[nbNeighb] = x + around[i][0] + (y + around[i][1])*SideSize;
 				++nbNeighb;
 			}
 		}
 		if (nbNeighb > 2)
 			continue;
 
 		for (int i = 0; i < 2; ++i)
 		{
 			if (neighbours[i] == -1)
 				continue;
 			// Move to our neighboring point
 			int xx = neighbours[i] % SideSize;
 			int yy = (neighbours[i] - xx ) / SideSize;
 			int indexx = xx + yy*SideSize;
 			int endedChain = false;
 
 			if (i == 0)
 				Chain.push_back(CoastalPoint(indexx,CVector2D(xx*4,yy*4)));
 			else
 				Chain.push_front(CoastalPoint(indexx,CVector2D(xx*4,yy*4)));
 
 			// If there's a loop we'll be the "other" neighboring point already so check for that.
 			// We'll readd at the end/front the other one to have full squares.
 			if (CoastalPointsSet.count(indexx) == 0)
 				break;
 
 			CoastalPointsSet.erase(indexx);
 
 			// Start checking from there.
 			while(!endedChain)
 			{
 				bool found = false;
 				nbNeighb = 0;
 				for (int p = 0; p < 8; ++p)
 				{
 					if (CoastalPointsSet.count(xx+around[p][0] + (yy + around[p][1])*SideSize))
 					{
 						if (nbNeighb >= 2)
 						{
 							CoastalPointsSet.erase(xx + yy*SideSize);
 							continue;
 						}
 						++nbNeighb;
 						// We've found a new point around us.
 						// Move there
 						xx = xx + around[p][0];
 						yy = yy + around[p][1];
 						indexx = xx + yy*SideSize;
 						if (i == 0)
 							Chain.push_back(CoastalPoint(indexx,CVector2D(xx*4,yy*4)));
 						else
 							Chain.push_front(CoastalPoint(indexx,CVector2D(xx*4,yy*4)));
 						CoastalPointsSet.erase(xx + yy*SideSize);
 						found = true;
 						break;
 					}
 				}
 				if (!found)
 					endedChain = true;
 			}
 		}
 		if (Chain.size() > 10)
 			CoastalPointsChains.push_back(Chain);
 	}
 
 	// (optional) third step: Smooth chains out.
 	// This is also really dumb.
 	for (size_t i = 0; i < CoastalPointsChains.size(); ++i)
 	{
 		// Bump 1 for smoother.
 		for (int p = 0; p < 3; ++p)
 		{
 			for (size_t j = 1; j < CoastalPointsChains[i].size()-1; ++j)
 			{
 				CVector2D realPos = CoastalPointsChains[i][j-1].position + CoastalPointsChains[i][j+1].position;
 
 				CoastalPointsChains[i][j].position = (CoastalPointsChains[i][j].position + realPos/2.0f)/2.0f;
 			}
 		}
 	}
 
 	// Fourth step: create waves themselves, using those chains. We basically create subchains.
 	u16 waveSizes = 14;	// maximal size in width.
 
 	// Construct indices buffer (we can afford one for all of them)
 	std::vector<u16> water_indices;
 	for (u16 a = 0; a < waveSizes - 1; ++a)
 	{
 		for (u16 rect = 0; rect < 7; ++rect)
 		{
 			water_indices.push_back(a * 9 + rect);
 			water_indices.push_back(a * 9 + 9 + rect);
 			water_indices.push_back(a * 9 + 1 + rect);
 			water_indices.push_back(a * 9 + 9 + rect);
 			water_indices.push_back(a * 9 + 10 + rect);
 			water_indices.push_back(a * 9 + 1 + rect);
 		}
 	}
 	// Generic indexes, max-length
 	m_ShoreWavesVBIndices = g_VBMan.AllocateChunk(
 		sizeof(u16), water_indices.size(),
 		Renderer::Backend::IBuffer::Type::INDEX, false,
 		nullptr, CVertexBufferManager::Group::WATER);
 	m_ShoreWavesVBIndices->m_Owner->UpdateChunkVertices(m_ShoreWavesVBIndices.Get(), &water_indices[0]);
 
 	float diff = (rand() % 50) / 5.0f;
 
 	std::vector<SWavesVertex> vertices, reversed;
 	for (size_t i = 0; i < CoastalPointsChains.size(); ++i)
 	{
 		for (size_t j = 0; j < CoastalPointsChains[i].size()-waveSizes; ++j)
 		{
 			if (CoastalPointsChains[i].size()- 1 - j < waveSizes)
 				break;
 
 			u16 width = waveSizes;
 
 			// First pass to get some parameters out.
 			float outmost = 0.0f;	// how far to move on the shore.
 			float avgDepth = 0.0f;
 			int sign = 1;
 			CVector2D firstPerp(0,0), perp(0,0), lastPerp(0,0);
 			for (u16 a = 0; a < waveSizes;++a)
 			{
 				lastPerp = perp;
 				perp = CVector2D(0,0);
 				int nb = 0;
 				CVector2D pos = CoastalPointsChains[i][j+a].position;
 				CVector2D posPlus;
 				CVector2D posMinus;
 				if (a > 0)
 				{
 					++nb;
 					posMinus = CoastalPointsChains[i][j+a-1].position;
 					perp += pos-posMinus;
 				}
 				if (a < waveSizes-1)
 				{
 					++nb;
 					posPlus = CoastalPointsChains[i][j+a+1].position;
 					perp += posPlus-pos;
 				}
 				perp /= nb;
 				perp = CVector2D(-perp.Y,perp.X).Normalized();
 
 				if (a == 0)
 					firstPerp = perp;
 
 				if ( a > 1 && perp.Dot(lastPerp) < 0.90f && perp.Dot(firstPerp) < 0.70f)
 				{
 					width = a+1;
 					break;
 				}
 
 				if (terrain->GetExactGroundLevel(pos.X+perp.X*1.5f, pos.Y+perp.Y*1.5f) > m_WaterHeight)
 					sign = -1;
 
 				avgDepth += terrain->GetExactGroundLevel(pos.X+sign*perp.X*20.0f, pos.Y+sign*perp.Y*20.0f) - m_WaterHeight;
 
 				float localOutmost = -2.0f;
 				while (localOutmost < 0.0f)
 				{
 					float depth = terrain->GetExactGroundLevel(pos.X+sign*perp.X*localOutmost, pos.Y+sign*perp.Y*localOutmost) - m_WaterHeight;
 					if (depth < 0.0f || depth > 0.6f)
 						localOutmost += 0.2f;
 					else
 						break;
 				}
 
 				outmost += localOutmost;
 			}
 			if (width < 5)
 			{
 				j += 6;
 				continue;
 			}
 
 			outmost /= width;
 
 			if (outmost > -0.5f)
 			{
 				j += 3;
 				continue;
 			}
 			outmost = -2.5f + outmost * m_Waviness/10.0f;
 
 			avgDepth /= width;
 
 			if (avgDepth > -1.3f)
 			{
 				j += 3;
 				continue;
 			}
 			// we passed the checks, we can create a wave of size "width".
 
 			std::unique_ptr<WaveObject> shoreWave = std::make_unique<WaveObject>();
 			vertices.clear();
 			vertices.reserve(9 * width);
 
 			shoreWave->m_Width = width;
 			shoreWave->m_TimeDiff = diff;
 			diff += (rand() % 100) / 25.0f + 4.0f;
 
 			for (u16 a = 0; a < width;++a)
 			{
 				perp = CVector2D(0,0);
 				int nb = 0;
 				CVector2D pos = CoastalPointsChains[i][j+a].position;
 				CVector2D posPlus;
 				CVector2D posMinus;
 				if (a > 0)
 				{
 					++nb;
 					posMinus = CoastalPointsChains[i][j+a-1].position;
 					perp += pos-posMinus;
 				}
 				if (a < waveSizes-1)
 				{
 					++nb;
 					posPlus = CoastalPointsChains[i][j+a+1].position;
 					perp += posPlus-pos;
 				}
 				perp /= nb;
 				perp = CVector2D(-perp.Y,perp.X).Normalized();
 
 				SWavesVertex point[9];
 
 				float baseHeight = 0.04f;
 
 				float halfWidth = (width-1.0f)/2.0f;
 				float sideNess = sqrtf(Clamp( (halfWidth - fabsf(a - halfWidth)) / 3.0f, 0.0f, 1.0f));
 
 				point[0].m_UV[0] = a; point[0].m_UV[1] = 8;
 				point[1].m_UV[0] = a; point[1].m_UV[1] = 7;
 				point[2].m_UV[0] = a; point[2].m_UV[1] = 6;
 				point[3].m_UV[0] = a; point[3].m_UV[1] = 5;
 				point[4].m_UV[0] = a; point[4].m_UV[1] = 4;
 				point[5].m_UV[0] = a; point[5].m_UV[1] = 3;
 				point[6].m_UV[0] = a; point[6].m_UV[1] = 2;
 				point[7].m_UV[0] = a; point[7].m_UV[1] = 1;
 				point[8].m_UV[0] = a; point[8].m_UV[1] = 0;
 
 				point[0].m_PerpVect = perp;
 				point[1].m_PerpVect = perp;
 				point[2].m_PerpVect = perp;
 				point[3].m_PerpVect = perp;
 				point[4].m_PerpVect = perp;
 				point[5].m_PerpVect = perp;
 				point[6].m_PerpVect = perp;
 				point[7].m_PerpVect = perp;
 				point[8].m_PerpVect = perp;
 
 				static const float perpT1[9] = { 6.0f, 6.05f, 6.1f, 6.2f, 6.3f, 6.4f, 6.5f, 6.6f, 9.7f };
 				static const float perpT2[9] = { 2.0f, 2.1f,  2.2f, 2.3f, 2.4f, 3.0f, 3.3f, 3.6f, 9.5f };
 				static const float perpT3[9] = { 1.1f, 0.7f, -0.2f, 0.0f, 0.6f, 1.3f, 2.2f, 3.6f, 9.0f };
 				static const float perpT4[9] = { 2.0f, 2.1f,  1.2f, 1.5f, 1.7f, 1.9f, 2.7f, 3.8f, 9.0f };
 
 				static const float heightT1[9] = { 0.0f, 0.2f, 0.5f, 0.8f, 0.9f, 0.85f, 0.6f, 0.2f, 0.0 };
 				static const float heightT2[9] = { -0.8f, -0.4f, 0.0f, 0.1f, 0.1f, 0.03f, 0.0f, 0.0f, 0.0 };
 				static const float heightT3[9] = { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0 };
 
 				for (size_t t = 0; t < 9; ++t)
 				{
 					float terrHeight = 0.05f + terrain->GetExactGroundLevel(pos.X+sign*perp.X*(perpT1[t]+outmost),
 																			pos.Y+sign*perp.Y*(perpT1[t]+outmost));
 					point[t].m_BasePosition = CVector3D(pos.X+sign*perp.X*(perpT1[t]+outmost), baseHeight + heightT1[t]*sideNess + std::max(m_WaterHeight,terrHeight),
 														pos.Y+sign*perp.Y*(perpT1[t]+outmost));
 				}
 				for (size_t t = 0; t < 9; ++t)
 				{
 					float terrHeight = 0.05f + terrain->GetExactGroundLevel(pos.X+sign*perp.X*(perpT2[t]+outmost),
 																			pos.Y+sign*perp.Y*(perpT2[t]+outmost));
 					point[t].m_ApexPosition = CVector3D(pos.X+sign*perp.X*(perpT2[t]+outmost), baseHeight + heightT1[t]*sideNess + std::max(m_WaterHeight,terrHeight),
 														pos.Y+sign*perp.Y*(perpT2[t]+outmost));
 				}
 				for (size_t t = 0; t < 9; ++t)
 				{
 					float terrHeight = 0.05f + terrain->GetExactGroundLevel(pos.X+sign*perp.X*(perpT3[t]+outmost*sideNess),
 																			pos.Y+sign*perp.Y*(perpT3[t]+outmost*sideNess));
 					point[t].m_SplashPosition = CVector3D(pos.X+sign*perp.X*(perpT3[t]+outmost*sideNess), baseHeight + heightT2[t]*sideNess + std::max(m_WaterHeight,terrHeight), pos.Y+sign*perp.Y*(perpT3[t]+outmost*sideNess));
 				}
 				for (size_t t = 0; t < 9; ++t)
 				{
 					float terrHeight = 0.05f + terrain->GetExactGroundLevel(pos.X+sign*perp.X*(perpT4[t]+outmost),
 																			pos.Y+sign*perp.Y*(perpT4[t]+outmost));
 					point[t].m_RetreatPosition = CVector3D(pos.X+sign*perp.X*(perpT4[t]+outmost), baseHeight + heightT3[t]*sideNess + std::max(m_WaterHeight,terrHeight),
 														   pos.Y+sign*perp.Y*(perpT4[t]+outmost));
 				}
 
 				vertices.push_back(point[8]);
 				vertices.push_back(point[7]);
 				vertices.push_back(point[6]);
 				vertices.push_back(point[5]);
 				vertices.push_back(point[4]);
 				vertices.push_back(point[3]);
 				vertices.push_back(point[2]);
 				vertices.push_back(point[1]);
 				vertices.push_back(point[0]);
 
 				shoreWave->m_AABB += point[8].m_SplashPosition;
 				shoreWave->m_AABB += point[8].m_BasePosition;
 				shoreWave->m_AABB += point[0].m_SplashPosition;
 				shoreWave->m_AABB += point[0].m_BasePosition;
 				shoreWave->m_AABB += point[4].m_ApexPosition;
 			}
 
 			if (sign == 1)
 			{
 				// Let's do some fancy reversing.
 				reversed.clear();
 				reversed.reserve(vertices.size());
 				for (int a = width - 1; a >= 0; --a)
 				{
 					for (size_t t = 0; t < 9; ++t)
 						reversed.push_back(vertices[a * 9 + t]);
 				}
 				std::swap(vertices, reversed);
 			}
 			j += width/2-1;
 
 			shoreWave->m_VBVertices = g_VBMan.AllocateChunk(
 				sizeof(SWavesVertex), vertices.size(),
 				Renderer::Backend::IBuffer::Type::VERTEX, false,
 				nullptr, CVertexBufferManager::Group::WATER);
 			shoreWave->m_VBVertices->m_Owner->UpdateChunkVertices(shoreWave->m_VBVertices.Get(), &vertices[0]);
 
 			m_ShoreWaves.emplace_back(std::move(shoreWave));
 		}
 	}
 }
 
 void WaterManager::RenderWaves(
 	Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
 	const CFrustum& frustrum)
 {
 	if (!m_WaterFancyEffects)
 		return;
 
+	m_WaveTex->UploadBackendTextureIfNeeded(deviceCommandContext);
+	m_FoamTex->UploadBackendTextureIfNeeded(deviceCommandContext);
+
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render Waves");
 
 	deviceCommandContext->SetGraphicsPipelineState(
 		Renderer::Backend::MakeDefaultGraphicsPipelineStateDesc());
 	deviceCommandContext->BeginFramebufferPass(m_FancyEffectsFramebuffer.get());
 	deviceCommandContext->ClearFramebuffer();
 
 	CShaderTechniquePtr tech = g_Renderer.GetShaderManager().LoadEffect(str_water_waves);
 	deviceCommandContext->SetGraphicsPipelineState(
 		tech->GetGraphicsPipelineStateDesc());
 	deviceCommandContext->BeginPass();
 	Renderer::Backend::IShaderProgram* shader = tech->GetShader();
 
-	m_WaveTex->UploadBackendTextureIfNeeded(deviceCommandContext);
-	m_FoamTex->UploadBackendTextureIfNeeded(deviceCommandContext);
-
 	deviceCommandContext->SetTexture(
 		shader->GetBindingSlot(str_waveTex), m_WaveTex->GetBackendTexture());
 	deviceCommandContext->SetTexture(
 		shader->GetBindingSlot(str_foamTex), m_FoamTex->GetBackendTexture());
 
 	deviceCommandContext->SetUniform(
 		shader->GetBindingSlot(str_time), static_cast<float>(m_WaterTexTimer));
 	const CMatrix3D transform =
 		g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection();
 	deviceCommandContext->SetUniform(
 		shader->GetBindingSlot(str_transform), transform.AsFloatArray());
 
 	for (size_t a = 0; a < m_ShoreWaves.size(); ++a)
 	{
 		if (!frustrum.IsBoxVisible(m_ShoreWaves[a]->m_AABB))
 			continue;
 
 		CVertexBuffer::VBChunk* VBchunk = m_ShoreWaves[a]->m_VBVertices.Get();
-		VBchunk->m_Owner->UploadIfNeeded(deviceCommandContext);
-		m_ShoreWavesVBIndices->m_Owner->UploadIfNeeded(deviceCommandContext);
+		ENSURE(!VBchunk->m_Owner->GetBuffer()->IsDynamic());
+		ENSURE(!m_ShoreWavesVBIndices->m_Owner->GetBuffer()->IsDynamic());
 
 		const uint32_t stride = sizeof(SWavesVertex);
 		const uint32_t firstVertexOffset = VBchunk->m_Index * stride;
 
 		deviceCommandContext->SetVertexAttributeFormat(
 			Renderer::Backend::VertexAttributeStream::POSITION,
 			Renderer::Backend::Format::R32G32B32_SFLOAT,
 			offsetof(SWavesVertex, m_BasePosition), stride,
 			Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 		deviceCommandContext->SetVertexAttributeFormat(
 			Renderer::Backend::VertexAttributeStream::NORMAL,
 			Renderer::Backend::Format::R32G32_SFLOAT,
 			offsetof(SWavesVertex, m_PerpVect), stride,
 			Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 		deviceCommandContext->SetVertexAttributeFormat(
 			Renderer::Backend::VertexAttributeStream::UV0,
 			Renderer::Backend::Format::R8G8_UINT,
 			offsetof(SWavesVertex, m_UV), stride,
 			Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 
 		deviceCommandContext->SetVertexAttributeFormat(
 			Renderer::Backend::VertexAttributeStream::UV1,
 			Renderer::Backend::Format::R32G32B32_SFLOAT,
 			offsetof(SWavesVertex, m_ApexPosition), stride,
 			Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 		deviceCommandContext->SetVertexAttributeFormat(
 			Renderer::Backend::VertexAttributeStream::UV2,
 			Renderer::Backend::Format::R32G32B32_SFLOAT,
 			offsetof(SWavesVertex, m_SplashPosition), stride,
 			Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 		deviceCommandContext->SetVertexAttributeFormat(
 			Renderer::Backend::VertexAttributeStream::UV3,
 			Renderer::Backend::Format::R32G32B32_SFLOAT,
 			offsetof(SWavesVertex, m_RetreatPosition), stride,
 			Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0);
 
 		deviceCommandContext->SetUniform(
 			shader->GetBindingSlot(str_translation), m_ShoreWaves[a]->m_TimeDiff);
 		deviceCommandContext->SetUniform(
 			shader->GetBindingSlot(str_width), static_cast<float>(m_ShoreWaves[a]->m_Width));
 
 		deviceCommandContext->SetVertexBuffer(
 			0, VBchunk->m_Owner->GetBuffer(), firstVertexOffset);
 		deviceCommandContext->SetIndexBuffer(m_ShoreWavesVBIndices->m_Owner->GetBuffer());
 
 		const uint32_t indexCount = (m_ShoreWaves[a]->m_Width - 1) * (7 * 6);
 		deviceCommandContext->DrawIndexed(m_ShoreWavesVBIndices->m_Index, indexCount, 0);
 
 		g_Renderer.GetStats().m_DrawCalls++;
 		g_Renderer.GetStats().m_WaterTris += indexCount / 3;
 	}
 	deviceCommandContext->EndPass();
 	deviceCommandContext->EndFramebufferPass();
 }
 
 void WaterManager::RecomputeWaterData()
 {
 	if (!m_MapSize)
 		return;
 
 	RecomputeDistanceHeightmap();
 	RecomputeWindStrength();
 	CreateWaveMeshes();
 }
 
 ///////////////////////////////////////////////////////////////////
 // Calculate the strength of the wind at a given point on the map.
 void WaterManager::RecomputeWindStrength()
 {
 	if (m_MapSize <= 0)
 		return;
 
 	if (!m_WindStrength)
 		m_WindStrength = std::make_unique<float[]>(m_MapSize * m_MapSize);
 
 	CTerrain* terrain = g_Game->GetWorld()->GetTerrain();
 	if (!terrain || !terrain->GetHeightMap())
 		return;
 
 	CVector2D windDir = CVector2D(cos(m_WindAngle), sin(m_WindAngle));
 
 	int stepSize = 10;
 	ssize_t windX = -round(stepSize * windDir.X);
 	ssize_t windY = -round(stepSize * windDir.Y);
 
 	struct SWindPoint {
 		SWindPoint(size_t x, size_t y, float strength) : X(x), Y(y), windStrength(strength) {}
 		ssize_t X;
 		ssize_t Y;
 		float windStrength;
 	};
 
 	std::vector<SWindPoint> startingPoints;
 	std::vector<std::pair<int, int>> movement; // Every increment, move each starting point by all of these.
 
 	// Compute starting points (one or two edges of the map) and how much to move each computation increment.
 	if (fabs(windDir.X) < 0.01f)
 	{
 		movement.emplace_back(0, windY > 0.f ? 1 : -1);
 		startingPoints.reserve(m_MapSize);
 		size_t start = windY > 0 ? 0 : m_MapSize - 1;
 		for (size_t x = 0; x < m_MapSize; ++x)
 			startingPoints.emplace_back(x, start, 0.f);
 	}
 	else if (fabs(windDir.Y) < 0.01f)
 	{
 		movement.emplace_back(windX > 0.f ? 1 : - 1, 0);
 		startingPoints.reserve(m_MapSize);
 		size_t start = windX > 0 ? 0 : m_MapSize - 1;
 		for (size_t z = 0; z < m_MapSize; ++z)
 			startingPoints.emplace_back(start, z, 0.f);
 	}
 	else
 	{
 		startingPoints.reserve(m_MapSize * 2);
 		// Points along X.
 		size_t start = windY > 0 ? 0 : m_MapSize - 1;
 		for (size_t x = 0; x < m_MapSize; ++x)
 			startingPoints.emplace_back(x, start, 0.f);
 		// Points along Z, avoid repeating the corner point.
 		start = windX > 0 ? 0 : m_MapSize - 1;
 		if (windY > 0)
 			for (size_t z = 1; z < m_MapSize; ++z)
 				startingPoints.emplace_back(start, z, 0.f);
 		else
 			for (size_t z = 0; z < m_MapSize-1; ++z)
 				startingPoints.emplace_back(start, z, 0.f);
 
 		// Compute movement array.
 		movement.reserve(std::max(std::abs(windX),std::abs(windY)));
 		while (windX != 0 || windY != 0)
 		{
 			std::pair<ssize_t, ssize_t> move = {
 				windX == 0 ? 0 : windX > 0 ? +1 : -1,
 				windY == 0 ? 0 : windY > 0 ? +1 : -1
 			};
 			windX -= move.first;
 			windY -= move.second;
 			movement.push_back(move);
 		}
 	}
 
 	// We have all starting points ready, move them all until the map is covered.
 	for (SWindPoint& point : startingPoints)
 	{
 		// Starting velocity is 1.0 unless in shallow water.
 		m_WindStrength[point.Y * m_MapSize + point.X] = 1.f;
 		float depth = m_WaterHeight - terrain->GetVertexGroundLevel(point.X, point.Y);
 		if (depth > 0.f && depth < 2.f)
 			m_WindStrength[point.Y * m_MapSize + point.X] = depth / 2.f;
 		point.windStrength = m_WindStrength[point.Y * m_MapSize + point.X];
 
 		bool onMap = true;
 		while (onMap)
 			for (size_t step = 0; step < movement.size(); ++step)
 			{
 				// Move wind speed towards the mean.
 				point.windStrength = 0.15f + point.windStrength * 0.85f;
 
 				// Adjust speed based on height difference, a positive height difference slowly increases speed (simulate venturi effect)
 				// and a lower height reduces speed (wind protection from hills/...)
 				float heightDiff = std::max(m_WaterHeight, terrain->GetVertexGroundLevel(point.X + movement[step].first, point.Y + movement[step].second)) -
 					std::max(m_WaterHeight, terrain->GetVertexGroundLevel(point.X, point.Y));
 				if (heightDiff > 0.f)
 					point.windStrength = std::min(2.f, point.windStrength + std::min(4.f, heightDiff) / 40.f);
 				else
 					point.windStrength = std::max(0.f, point.windStrength + std::max(-4.f, heightDiff) / 5.f);
 
 				point.X += movement[step].first;
 				point.Y += movement[step].second;
 
 				if (point.X < 0 || point.X >= static_cast<ssize_t>(m_MapSize) || point.Y < 0 || point.Y >= static_cast<ssize_t>(m_MapSize))
 				{
 					onMap = false;
 					break;
 				}
 				m_WindStrength[point.Y * m_MapSize + point.X] = point.windStrength;
 			}
 	}
 	// TODO: should perhaps blur a little, or change the above code to incorporate neighboring tiles a bit.
 }
 
 ////////////////////////////////////////////////////////////////////////
 // TODO: This will always recalculate for now
 void WaterManager::SetMapSize(size_t size)
 {
 	// TODO: Im' blindly trusting the user here.
 	m_MapSize = size;
 	m_NeedInfoUpdate = true;
 	m_updatei0 = 0;
 	m_updatei1 = size;
 	m_updatej0 = 0;
 	m_updatej1 = size;
 
 	m_DistanceHeightmap.reset();
 	m_WindStrength.reset();
 }
 
 ////////////////////////////////////////////////////////////////////////
 // This will set the bools properly
 void WaterManager::UpdateQuality()
 {
 	if (g_RenderingOptions.GetWaterEffects() != m_WaterEffects)
 	{
 		m_WaterEffects = g_RenderingOptions.GetWaterEffects();
 		m_NeedsReloading = true;
 	}
 	if (g_RenderingOptions.GetWaterFancyEffects() != m_WaterFancyEffects)
 	{
 		m_WaterFancyEffects = g_RenderingOptions.GetWaterFancyEffects();
 		m_NeedsReloading = true;
 	}
 	if (g_RenderingOptions.GetWaterRealDepth() != m_WaterRealDepth)
 	{
 		m_WaterRealDepth = g_RenderingOptions.GetWaterRealDepth();
 		m_NeedsReloading = true;
 	}
 	if (g_RenderingOptions.GetWaterRefraction() != m_WaterRefraction)
 	{
 		m_WaterRefraction = g_RenderingOptions.GetWaterRefraction();
 		m_NeedsReloading = true;
 	}
 	if (g_RenderingOptions.GetWaterReflection() != m_WaterReflection)
 	{
 		m_WaterReflection = g_RenderingOptions.GetWaterReflection();
 		m_NeedsReloading = true;
 	}
 }
 
 bool WaterManager::WillRenderFancyWater() const
 {
 	return
 		m_RenderWater && g_VideoMode.GetBackendDevice()->GetBackend() != Renderer::Backend::Backend::GL_ARB &&
 		g_RenderingOptions.GetWaterEffects();
 }
 
 size_t WaterManager::GetCurrentTextureIndex(const double& period) const
 {
 	ENSURE(period > 0.0);
 	return static_cast<size_t>(m_WaterTexTimer * ARRAY_SIZE(m_WaterTexture) / period) % ARRAY_SIZE(m_WaterTexture);
 }
 
 size_t WaterManager::GetNextTextureIndex(const double& period) const
 {
 	ENSURE(period > 0.0);
 	return (GetCurrentTextureIndex(period) + 1) % ARRAY_SIZE(m_WaterTexture);
 }
Index: ps/trunk/source/renderer/backend/gl/DeviceCommandContext.cpp
===================================================================
--- ps/trunk/source/renderer/backend/gl/DeviceCommandContext.cpp	(revision 27181)
+++ ps/trunk/source/renderer/backend/gl/DeviceCommandContext.cpp	(revision 27182)
@@ -1,1209 +1,1213 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "DeviceCommandContext.h"
 
 #include "ps/CLogger.h"
 #include "renderer/backend/gl/Buffer.h"
 #include "renderer/backend/gl/Device.h"
 #include "renderer/backend/gl/Framebuffer.h"
 #include "renderer/backend/gl/Mapping.h"
 #include "renderer/backend/gl/ShaderProgram.h"
 #include "renderer/backend/gl/Texture.h"
 
 #include <algorithm>
 #include <cstring>
 #include <limits>
 
 namespace Renderer
 {
 
 namespace Backend
 {
 
 namespace GL
 {
 
 namespace
 {
 
 bool operator==(const StencilOpState& lhs, const StencilOpState& rhs)
 {
 	return
 		lhs.failOp == rhs.failOp &&
 		lhs.passOp == rhs.passOp &&
 		lhs.depthFailOp == rhs.depthFailOp &&
 		lhs.compareOp == rhs.compareOp;
 }
 bool operator!=(const StencilOpState& lhs, const StencilOpState& rhs)
 {
 	return !operator==(lhs, rhs);
 }
 
 bool operator==(
 	const CDeviceCommandContext::Rect& lhs,
 	const CDeviceCommandContext::Rect& rhs)
 {
 	return
 		lhs.x == rhs.x && lhs.y == rhs.y &&
 		lhs.width == rhs.width && lhs.height == rhs.height;
 }
 
 bool operator!=(
 	const CDeviceCommandContext::Rect& lhs,
 	const CDeviceCommandContext::Rect& rhs)
 {
 	return !operator==(lhs, rhs);
 }
 
 void ApplyDepthMask(const bool depthWriteEnabled)
 {
 	glDepthMask(depthWriteEnabled ? GL_TRUE : GL_FALSE);
 }
 
 void ApplyColorMask(const uint8_t colorWriteMask)
 {
 	glColorMask(
 		(colorWriteMask & ColorWriteMask::RED) != 0 ? GL_TRUE : GL_FALSE,
 		(colorWriteMask & ColorWriteMask::GREEN) != 0 ? GL_TRUE : GL_FALSE,
 		(colorWriteMask & ColorWriteMask::BLUE) != 0 ? GL_TRUE : GL_FALSE,
 		(colorWriteMask & ColorWriteMask::ALPHA) != 0 ? GL_TRUE : GL_FALSE);
 }
 
 void ApplyStencilMask(const uint32_t stencilWriteMask)
 {
 	glStencilMask(stencilWriteMask);
 }
 
 GLenum BufferTypeToGLTarget(const CBuffer::Type type)
 {
 	GLenum target = GL_ARRAY_BUFFER;
 	switch (type)
 	{
 	case CBuffer::Type::VERTEX:
 		target = GL_ARRAY_BUFFER;
 		break;
 	case CBuffer::Type::INDEX:
 		target = GL_ELEMENT_ARRAY_BUFFER;
 		break;
 	};
 	return target;
 }
 
 #if !CONFIG2_GLES
 bool IsDepthTexture(const Format format)
 {
 	return
 		format == Format::D16 || format == Format::D24 ||
 		format == Format::D32 || format == Format::D24_S8;
 }
 #endif // !CONFIG2_GLES
 
 void UploadDynamicBufferRegionImpl(
 	const GLenum target, const uint32_t bufferSize,
 	const uint32_t dataOffset, const uint32_t dataSize,
 	const CDeviceCommandContext::UploadBufferFunction& uploadFunction)
 {
 	ENSURE(dataOffset < dataSize);
 	// Tell the driver that it can reallocate the whole VBO
 	glBufferDataARB(target, bufferSize, nullptr, GL_DYNAMIC_DRAW);
 	ogl_WarnIfError();
 
 	while (true)
 	{
 		// (In theory, glMapBufferRange with GL_MAP_INVALIDATE_BUFFER_BIT could be used
 		// here instead of glBufferData(..., NULL, ...) plus glMapBuffer(), but with
 		// current Intel Windows GPU drivers (as of 2015-01) it's much faster if you do
 		// the explicit glBufferData.)
 		void* mappedData = glMapBufferARB(target, GL_WRITE_ONLY);
 		if (mappedData == nullptr)
 		{
 			// This shouldn't happen unless we run out of virtual address space
 			LOGERROR("glMapBuffer failed");
 			break;
 		}
 
 		uploadFunction(static_cast<u8*>(mappedData) + dataOffset);
 
 		if (glUnmapBufferARB(target) == GL_TRUE)
 			break;
 
 		// Unmap might fail on e.g. resolution switches, so just try again
 		// and hope it will eventually succeed
 		LOGMESSAGE("glUnmapBuffer failed, trying again...\n");
 	}
 }
 
 } // anonymous namespace
 
 // static
 std::unique_ptr<CDeviceCommandContext> CDeviceCommandContext::Create(CDevice* device)
 {
 	std::unique_ptr<CDeviceCommandContext> deviceCommandContext(new CDeviceCommandContext(device));
 	deviceCommandContext->m_Framebuffer = static_cast<CFramebuffer*>(device->GetCurrentBackbuffer());
 	deviceCommandContext->ResetStates();
 	return deviceCommandContext;
 }
 
 CDeviceCommandContext::CDeviceCommandContext(CDevice* device)
 	: m_Device(device)
 {
 	glActiveTexture(GL_TEXTURE0);
 	glBindTexture(GL_TEXTURE_2D, 0);
 	for (BindUnit& unit : m_BoundTextures)
 	{
 		unit.target = GL_TEXTURE_2D;
 		unit.handle = 0;
 	}
 	for (size_t index = 0; index < m_VertexAttributeFormat.size(); ++index)
 	{
 		m_VertexAttributeFormat[index].active = false;
 		m_VertexAttributeFormat[index].initialized = false;
 		m_VertexAttributeFormat[index].bindingSlot = 0;
 	}
 
 	for (size_t index = 0; index < m_BoundBuffers.size(); ++index)
 	{
 		const CBuffer::Type type = static_cast<CBuffer::Type>(index);
 		const GLenum target = BufferTypeToGLTarget(type);
 		const GLuint handle = 0;
 		m_BoundBuffers[index].first = target;
 		m_BoundBuffers[index].second = handle;
 	}
 }
 
 CDeviceCommandContext::~CDeviceCommandContext() = default;
 
 IDevice* CDeviceCommandContext::GetDevice()
 {
 	return m_Device;
 }
 
 void CDeviceCommandContext::SetGraphicsPipelineState(
 	const GraphicsPipelineStateDesc& pipelineStateDesc)
 {
 	SetGraphicsPipelineStateImpl(pipelineStateDesc, false);
 }
 
 void CDeviceCommandContext::UploadTexture(
 	ITexture* texture, const Format format,
 	const void* data, const size_t dataSize,
 	const uint32_t level, const uint32_t layer)
 {
 	UploadTextureRegion(texture, format, data, dataSize,
 		0, 0,
 		std::max(1u, texture->GetWidth() >> level),
 		std::max(1u, texture->GetHeight() >> level),
 		level, layer);
 }
 
 void CDeviceCommandContext::UploadTextureRegion(
 	ITexture* destinationTexture, const Format dataFormat,
 	const void* data, const size_t dataSize,
 	const uint32_t xOffset, const uint32_t yOffset,
 	const uint32_t width, const uint32_t height,
 	const uint32_t level, const uint32_t layer)
 {
 	ENSURE(destinationTexture);
 	CTexture* texture = destinationTexture->As<CTexture>();
 	ENSURE(texture->GetUsage() & Renderer::Backend::ITexture::Usage::TRANSFER_DST);
 	ENSURE(width > 0 && height > 0);
 	if (texture->GetType() == CTexture::Type::TEXTURE_2D)
 	{
 		ENSURE(layer == 0);
 		if (texture->GetFormat() == Format::R8G8B8A8_UNORM ||
 			texture->GetFormat() == Format::R8G8B8_UNORM ||
 #if !CONFIG2_GLES
 			texture->GetFormat() == Format::R8_UNORM ||
 #endif
 			texture->GetFormat() == Format::A8_UNORM)
 		{
 			ENSURE(texture->GetFormat() == dataFormat);
 			size_t bytesPerPixel = 4;
 			GLenum pixelFormat = GL_RGBA;
 			switch (dataFormat)
 			{
 			case Format::R8G8B8A8_UNORM:
 				break;
 			case Format::R8G8B8_UNORM:
 				pixelFormat = GL_RGB;
 				bytesPerPixel = 3;
 				break;
 #if !CONFIG2_GLES
 			case Format::R8_UNORM:
 				pixelFormat = GL_RED;
 				bytesPerPixel = 1;
 				break;
 #endif
 			case Format::A8_UNORM:
 				pixelFormat = GL_ALPHA;
 				bytesPerPixel = 1;
 				break;
 			case Format::L8_UNORM:
 				pixelFormat = GL_LUMINANCE;
 				bytesPerPixel = 1;
 				break;
 			default:
 				debug_warn("Unexpected format.");
 				break;
 			}
 			ENSURE(dataSize == width * height * bytesPerPixel);
 
 			ScopedBind scopedBind(this, GL_TEXTURE_2D, texture->GetHandle());
 			glTexSubImage2D(GL_TEXTURE_2D, level,
 				xOffset, yOffset, width, height,
 				pixelFormat, GL_UNSIGNED_BYTE, data);
 			ogl_WarnIfError();
 		}
 		else if (
 			texture->GetFormat() == Format::BC1_RGB_UNORM ||
 			texture->GetFormat() == Format::BC1_RGBA_UNORM ||
 			texture->GetFormat() == Format::BC2_UNORM ||
 			texture->GetFormat() == Format::BC3_UNORM)
 		{
 			ENSURE(xOffset == 0 && yOffset == 0);
 			ENSURE(texture->GetFormat() == dataFormat);
 			// TODO: add data size check.
 
 			GLenum internalFormat = GL_COMPRESSED_RGB_S3TC_DXT1_EXT;
 			switch (texture->GetFormat())
 			{
 			case Format::BC1_RGBA_UNORM:
 				internalFormat = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
 				break;
 			case Format::BC2_UNORM:
 				internalFormat = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
 				break;
 			case Format::BC3_UNORM:
 				internalFormat = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
 				break;
 			default:
 				break;
 			}
 
 			ScopedBind scopedBind(this, GL_TEXTURE_2D, texture->GetHandle());
 			glCompressedTexImage2DARB(GL_TEXTURE_2D, level, internalFormat, width, height, 0, dataSize, data);
 			ogl_WarnIfError();
 		}
 		else
 			debug_warn("Unsupported format");
 	}
 	else if (texture->GetType() == CTexture::Type::TEXTURE_CUBE)
 	{
 		if (texture->GetFormat() == Format::R8G8B8A8_UNORM)
 		{
 			ENSURE(texture->GetFormat() == dataFormat);
 			ENSURE(level == 0 && layer < 6);
 			ENSURE(xOffset == 0 && yOffset == 0 && texture->GetWidth() == width && texture->GetHeight() == height);
 			const size_t bpp = 4;
 			ENSURE(dataSize == width * height * bpp);
 
 			// The order of layers should be the following:
 			//   front, back, top, bottom, right, left
 			static const GLenum targets[6] =
 			{
 				GL_TEXTURE_CUBE_MAP_POSITIVE_X,
 				GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
 				GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
 				GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
 				GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
 				GL_TEXTURE_CUBE_MAP_NEGATIVE_Z
 			};
 
 			ScopedBind scopedBind(this, GL_TEXTURE_CUBE_MAP, texture->GetHandle());
 			glTexImage2D(targets[layer], level, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, data);
 			ogl_WarnIfError();
 		}
 		else
 			debug_warn("Unsupported format");
 	}
 	else
 		debug_warn("Unsupported type");
 }
 
 void CDeviceCommandContext::UploadBuffer(IBuffer* buffer, const void* data, const uint32_t dataSize)
 {
+	ENSURE(!m_InsideFramebufferPass);
 	UploadBufferRegion(buffer, data, dataSize, 0);
 }
 
 void CDeviceCommandContext::UploadBuffer(
 	IBuffer* buffer, const UploadBufferFunction& uploadFunction)
 {
+	ENSURE(!m_InsideFramebufferPass);
 	UploadBufferRegion(buffer, 0, buffer->GetSize(), uploadFunction);
 }
 
 void CDeviceCommandContext::UploadBufferRegion(
 	IBuffer* buffer, const void* data, const uint32_t dataOffset, const uint32_t dataSize)
 {
+	ENSURE(!m_InsideFramebufferPass);
 	ENSURE(data);
 	ENSURE(dataOffset + dataSize <= buffer->GetSize());
 	const GLenum target = BufferTypeToGLTarget(buffer->GetType());
 	ScopedBufferBind scopedBufferBind(this, buffer->As<CBuffer>());
 	if (buffer->IsDynamic())
 	{
 		UploadDynamicBufferRegionImpl(target, buffer->GetSize(), dataOffset, dataSize, [data, dataSize](u8* mappedData)
 		{
 			std::memcpy(mappedData, data, dataSize);
 		});
 	}
 	else
 	{
 		glBufferSubDataARB(target, dataOffset, dataSize, data);
 		ogl_WarnIfError();
 	}
 }
 
 void CDeviceCommandContext::UploadBufferRegion(
 	IBuffer* buffer, const uint32_t dataOffset, const uint32_t dataSize,
 	const UploadBufferFunction& uploadFunction)
 {
+	ENSURE(!m_InsideFramebufferPass);
 	ENSURE(dataOffset + dataSize <= buffer->GetSize());
 	const GLenum target = BufferTypeToGLTarget(buffer->GetType());
 	ScopedBufferBind scopedBufferBind(this, buffer->As<CBuffer>());
 	ENSURE(buffer->IsDynamic());
 	UploadDynamicBufferRegionImpl(target, buffer->GetSize(), dataOffset, dataSize, uploadFunction);
 }
 
 void CDeviceCommandContext::BeginScopedLabel(const char* name)
 {
 	if (!m_Device->GetCapabilities().debugScopedLabels)
 		return;
 
 	++m_ScopedLabelDepth;
 	glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0x0AD, -1, name);
 }
 
 void CDeviceCommandContext::EndScopedLabel()
 {
 	if (!m_Device->GetCapabilities().debugScopedLabels)
 		return;
 
 	ENSURE(m_ScopedLabelDepth > 0);
 	--m_ScopedLabelDepth;
 	glPopDebugGroup();
 }
 
 void CDeviceCommandContext::BindTexture(
 	const uint32_t unit, const GLenum target, const GLuint handle)
 {
 	ENSURE(unit < m_BoundTextures.size());
 #if CONFIG2_GLES
 	ENSURE(target == GL_TEXTURE_2D || target == GL_TEXTURE_CUBE_MAP);
 #else
 	ENSURE(target == GL_TEXTURE_2D || target == GL_TEXTURE_CUBE_MAP || target == GL_TEXTURE_2D_MULTISAMPLE);
 #endif
 	if (m_ActiveTextureUnit != unit)
 	{
 		glActiveTexture(GL_TEXTURE0 + unit);
 		m_ActiveTextureUnit = unit;
 	}
 	if (m_BoundTextures[unit].target == target && m_BoundTextures[unit].handle == handle)
 		return;
 	if (m_BoundTextures[unit].target != target && m_BoundTextures[unit].target && m_BoundTextures[unit].handle)
 		glBindTexture(m_BoundTextures[unit].target, 0);
 	if (m_BoundTextures[unit].handle != handle)
 		glBindTexture(target, handle);
 	ogl_WarnIfError();
 	m_BoundTextures[unit] = {target, handle};
 }
 
 void CDeviceCommandContext::BindBuffer(const IBuffer::Type type, CBuffer* buffer)
 {
 	ENSURE(!buffer || buffer->GetType() == type);
 	if (type == IBuffer::Type::VERTEX)
 	{
 		if (m_VertexBuffer == buffer)
 			return;
 		m_VertexBuffer = buffer;
 	}
 	else if (type == IBuffer::Type::INDEX)
 	{
 		if (!buffer)
 			m_IndexBuffer = nullptr;
 		m_IndexBufferData = nullptr;
 	}
 	const GLenum target = BufferTypeToGLTarget(type);
 	const GLuint handle = buffer ? buffer->GetHandle() : 0;
 	glBindBufferARB(target, handle);
 	ogl_WarnIfError();
 	const size_t cacheIndex = static_cast<size_t>(type);
 	ENSURE(cacheIndex < m_BoundBuffers.size());
 	m_BoundBuffers[cacheIndex].second = handle;
 }
 
 void CDeviceCommandContext::OnTextureDestroy(CTexture* texture)
 {
 	ENSURE(texture);
 	for (size_t index = 0; index < m_BoundTextures.size(); ++index)
 		if (m_BoundTextures[index].handle == texture->GetHandle())
 			BindTexture(index, GL_TEXTURE_2D, 0);
 }
 
 void CDeviceCommandContext::Flush()
 {
 	ENSURE(m_ScopedLabelDepth == 0);
 
 	GPU_SCOPED_LABEL(this, "CDeviceCommandContext::Flush");
 
 	ResetStates();
 
 	m_IndexBuffer = nullptr;
 	m_IndexBufferData = nullptr;
 
 	for (size_t unit = 0; unit < m_BoundTextures.size(); ++unit)
 	{
 		if (m_BoundTextures[unit].handle)
 			BindTexture(unit, GL_TEXTURE_2D, 0);
 	}
 	BindBuffer(CBuffer::Type::INDEX, nullptr);
 	BindBuffer(CBuffer::Type::VERTEX, nullptr);
 }
 
 void CDeviceCommandContext::ResetStates()
 {
 	SetGraphicsPipelineStateImpl(MakeDefaultGraphicsPipelineStateDesc(), true);
 	SetScissors(0, nullptr);
 	m_Framebuffer = static_cast<CFramebuffer*>(m_Device->GetCurrentBackbuffer());
 	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, m_Framebuffer->GetHandle());
 	ogl_WarnIfError();
 }
 
 void CDeviceCommandContext::SetGraphicsPipelineStateImpl(
 	const GraphicsPipelineStateDesc& pipelineStateDesc, const bool force)
 {
 	ENSURE(!m_InsidePass);
 
 	if (m_GraphicsPipelineStateDesc.shaderProgram != pipelineStateDesc.shaderProgram)
 	{
 		CShaderProgram* currentShaderProgram = nullptr;
 		if (m_GraphicsPipelineStateDesc.shaderProgram)
 		{
 			currentShaderProgram =
 				static_cast<CShaderProgram*>(m_GraphicsPipelineStateDesc.shaderProgram);
 		}
 		CShaderProgram* nextShaderProgram = nullptr;
 		if (pipelineStateDesc.shaderProgram)
 		{
 			nextShaderProgram =
 				static_cast<CShaderProgram*>(pipelineStateDesc.shaderProgram);
 			for (size_t index = 0; index < m_VertexAttributeFormat.size(); ++index)
 			{
 				const VertexAttributeStream stream = static_cast<VertexAttributeStream>(index);
 				m_VertexAttributeFormat[index].active = nextShaderProgram->IsStreamActive(stream);
 				m_VertexAttributeFormat[index].initialized = false;
 				m_VertexAttributeFormat[index].bindingSlot = std::numeric_limits<uint32_t>::max();
 			}
 		}
 		if (nextShaderProgram)
 			nextShaderProgram->Bind(currentShaderProgram);
 		else if (currentShaderProgram)
 			currentShaderProgram->Unbind();
 
 		m_ShaderProgram = nextShaderProgram;
 	}
 
 	const DepthStencilStateDesc& currentDepthStencilStateDesc = m_GraphicsPipelineStateDesc.depthStencilState;
 	const DepthStencilStateDesc& nextDepthStencilStateDesc = pipelineStateDesc.depthStencilState;
 	if (force || currentDepthStencilStateDesc.depthTestEnabled != nextDepthStencilStateDesc.depthTestEnabled)
 	{
 		if (nextDepthStencilStateDesc.depthTestEnabled)
 			glEnable(GL_DEPTH_TEST);
 		else
 			glDisable(GL_DEPTH_TEST);
 	}
 	if (force || currentDepthStencilStateDesc.depthCompareOp != nextDepthStencilStateDesc.depthCompareOp)
 	{
 		glDepthFunc(Mapping::FromCompareOp(nextDepthStencilStateDesc.depthCompareOp));
 	}
 	if (force || currentDepthStencilStateDesc.depthWriteEnabled != nextDepthStencilStateDesc.depthWriteEnabled)
 	{
 		ApplyDepthMask(nextDepthStencilStateDesc.depthWriteEnabled);
 	}
 
 	if (force || currentDepthStencilStateDesc.stencilTestEnabled != nextDepthStencilStateDesc.stencilTestEnabled)
 	{
 		if (nextDepthStencilStateDesc.stencilTestEnabled)
 			glEnable(GL_STENCIL_TEST);
 		else
 			glDisable(GL_STENCIL_TEST);
 	}
 	if (force ||
 		currentDepthStencilStateDesc.stencilFrontFace != nextDepthStencilStateDesc.stencilFrontFace ||
 		currentDepthStencilStateDesc.stencilBackFace != nextDepthStencilStateDesc.stencilBackFace)
 	{
 		if (nextDepthStencilStateDesc.stencilFrontFace == nextDepthStencilStateDesc.stencilBackFace)
 		{
 			glStencilOp(
 				Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilFrontFace.failOp),
 				Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilFrontFace.depthFailOp),
 				Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilFrontFace.passOp));
 		}
 		else
 		{
 			if (force || currentDepthStencilStateDesc.stencilFrontFace != nextDepthStencilStateDesc.stencilFrontFace)
 			{
 				glStencilOpSeparate(
 					GL_FRONT,
 					Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilFrontFace.failOp),
 					Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilFrontFace.depthFailOp),
 					Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilFrontFace.passOp));
 			}
 			if (force || currentDepthStencilStateDesc.stencilBackFace != nextDepthStencilStateDesc.stencilBackFace)
 			{
 				glStencilOpSeparate(
 					GL_BACK,
 					Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilBackFace.failOp),
 					Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilBackFace.depthFailOp),
 					Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilBackFace.passOp));
 			}
 		}
 	}
 	if (force || currentDepthStencilStateDesc.stencilWriteMask != nextDepthStencilStateDesc.stencilWriteMask)
 	{
 		ApplyStencilMask(nextDepthStencilStateDesc.stencilWriteMask);
 	}
 	if (force ||
 		currentDepthStencilStateDesc.stencilReference != nextDepthStencilStateDesc.stencilReference ||
 		currentDepthStencilStateDesc.stencilReadMask != nextDepthStencilStateDesc.stencilReadMask ||
 		currentDepthStencilStateDesc.stencilFrontFace.compareOp != nextDepthStencilStateDesc.stencilFrontFace.compareOp ||
 		currentDepthStencilStateDesc.stencilBackFace.compareOp != nextDepthStencilStateDesc.stencilBackFace.compareOp)
 	{
 		if (nextDepthStencilStateDesc.stencilFrontFace.compareOp == nextDepthStencilStateDesc.stencilBackFace.compareOp)
 		{
 			glStencilFunc(
 				Mapping::FromCompareOp(nextDepthStencilStateDesc.stencilFrontFace.compareOp),
 				nextDepthStencilStateDesc.stencilReference,
 				nextDepthStencilStateDesc.stencilReadMask);
 		}
 		else
 		{
 			glStencilFuncSeparate(GL_FRONT,
 				Mapping::FromCompareOp(nextDepthStencilStateDesc.stencilFrontFace.compareOp),
 				nextDepthStencilStateDesc.stencilReference,
 				nextDepthStencilStateDesc.stencilReadMask);
 			glStencilFuncSeparate(GL_BACK,
 				Mapping::FromCompareOp(nextDepthStencilStateDesc.stencilBackFace.compareOp),
 				nextDepthStencilStateDesc.stencilReference,
 				nextDepthStencilStateDesc.stencilReadMask);
 		}
 	}
 
 	const BlendStateDesc& currentBlendStateDesc = m_GraphicsPipelineStateDesc.blendState;
 	const BlendStateDesc& nextBlendStateDesc = pipelineStateDesc.blendState;
 	if (force || currentBlendStateDesc.enabled != nextBlendStateDesc.enabled)
 	{
 		if (nextBlendStateDesc.enabled)
 			glEnable(GL_BLEND);
 		else
 			glDisable(GL_BLEND);
 	}
 	if (force ||
 		currentBlendStateDesc.srcColorBlendFactor != nextBlendStateDesc.srcColorBlendFactor ||
 		currentBlendStateDesc.srcAlphaBlendFactor != nextBlendStateDesc.srcAlphaBlendFactor ||
 		currentBlendStateDesc.dstColorBlendFactor != nextBlendStateDesc.dstColorBlendFactor ||
 		currentBlendStateDesc.dstAlphaBlendFactor != nextBlendStateDesc.dstAlphaBlendFactor)
 	{
 		if (nextBlendStateDesc.srcColorBlendFactor == nextBlendStateDesc.srcAlphaBlendFactor &&
 			nextBlendStateDesc.dstColorBlendFactor == nextBlendStateDesc.dstAlphaBlendFactor)
 		{
 			glBlendFunc(
 				Mapping::FromBlendFactor(nextBlendStateDesc.srcColorBlendFactor),
 				Mapping::FromBlendFactor(nextBlendStateDesc.dstColorBlendFactor));
 		}
 		else
 		{
 			glBlendFuncSeparate(
 				Mapping::FromBlendFactor(nextBlendStateDesc.srcColorBlendFactor),
 				Mapping::FromBlendFactor(nextBlendStateDesc.dstColorBlendFactor),
 				Mapping::FromBlendFactor(nextBlendStateDesc.srcAlphaBlendFactor),
 				Mapping::FromBlendFactor(nextBlendStateDesc.dstAlphaBlendFactor));
 		}
 	}
 
 	if (force ||
 		currentBlendStateDesc.colorBlendOp != nextBlendStateDesc.colorBlendOp ||
 		currentBlendStateDesc.alphaBlendOp != nextBlendStateDesc.alphaBlendOp)
 	{
 		if (nextBlendStateDesc.colorBlendOp == nextBlendStateDesc.alphaBlendOp)
 		{
 			glBlendEquation(Mapping::FromBlendOp(nextBlendStateDesc.colorBlendOp));
 		}
 		else
 		{
 			glBlendEquationSeparate(
 				Mapping::FromBlendOp(nextBlendStateDesc.colorBlendOp),
 				Mapping::FromBlendOp(nextBlendStateDesc.alphaBlendOp));
 		}
 	}
 
 	if (force ||
 		currentBlendStateDesc.constant != nextBlendStateDesc.constant)
 	{
 		glBlendColor(
 			nextBlendStateDesc.constant.r,
 			nextBlendStateDesc.constant.g,
 			nextBlendStateDesc.constant.b,
 			nextBlendStateDesc.constant.a);
 	}
 
 	if (force ||
 		currentBlendStateDesc.colorWriteMask != nextBlendStateDesc.colorWriteMask)
 	{
 		ApplyColorMask(nextBlendStateDesc.colorWriteMask);
 	}
 
 	const RasterizationStateDesc& currentRasterizationStateDesc =
 		m_GraphicsPipelineStateDesc.rasterizationState;
 	const RasterizationStateDesc& nextRasterizationStateDesc =
 		pipelineStateDesc.rasterizationState;
 	if (force ||
 		currentRasterizationStateDesc.polygonMode != nextRasterizationStateDesc.polygonMode)
 	{
 #if !CONFIG2_GLES
 		glPolygonMode(
 			GL_FRONT_AND_BACK,
 			nextRasterizationStateDesc.polygonMode == PolygonMode::LINE ? GL_LINE : GL_FILL);
 #endif
 	}
 
 	if (force ||
 		currentRasterizationStateDesc.cullMode != nextRasterizationStateDesc.cullMode)
 	{
 		if (nextRasterizationStateDesc.cullMode == CullMode::NONE)
 		{
 			glDisable(GL_CULL_FACE);
 		}
 		else
 		{
 			if (force || currentRasterizationStateDesc.cullMode == CullMode::NONE)
 				glEnable(GL_CULL_FACE);
 			glCullFace(nextRasterizationStateDesc.cullMode == CullMode::FRONT ? GL_FRONT : GL_BACK);
 		}
 	}
 
 	if (force ||
 		currentRasterizationStateDesc.frontFace != nextRasterizationStateDesc.frontFace)
 	{
 		if (nextRasterizationStateDesc.frontFace == FrontFace::CLOCKWISE)
 			glFrontFace(GL_CW);
 		else
 			glFrontFace(GL_CCW);
 	}
 
 #if !CONFIG2_GLES
 	if (force ||
 		currentRasterizationStateDesc.depthBiasEnabled != nextRasterizationStateDesc.depthBiasEnabled)
 	{
 		if (nextRasterizationStateDesc.depthBiasEnabled)
 			glEnable(GL_POLYGON_OFFSET_FILL);
 		else
 			glDisable(GL_POLYGON_OFFSET_FILL);
 	}
 	if (force ||
 		currentRasterizationStateDesc.depthBiasConstantFactor != nextRasterizationStateDesc.depthBiasConstantFactor ||
 		currentRasterizationStateDesc.depthBiasSlopeFactor != nextRasterizationStateDesc.depthBiasSlopeFactor)
 	{
 		glPolygonOffset(
 			nextRasterizationStateDesc.depthBiasSlopeFactor,
 			nextRasterizationStateDesc.depthBiasConstantFactor);
 	}
 #endif
 
 	ogl_WarnIfError();
 
 	m_GraphicsPipelineStateDesc = pipelineStateDesc;
 }
 
 void CDeviceCommandContext::BlitFramebuffer(
 	IFramebuffer* dstFramebuffer, IFramebuffer* srcFramebuffer)
 {
 	ENSURE(!m_InsideFramebufferPass);
 	CFramebuffer* destinationFramebuffer = dstFramebuffer->As<CFramebuffer>();
 	CFramebuffer* sourceFramebuffer = srcFramebuffer->As<CFramebuffer>();
 #if CONFIG2_GLES
 	UNUSED2(destinationFramebuffer);
 	UNUSED2(sourceFramebuffer);
 	debug_warn("CDeviceCommandContext::BlitFramebuffer is not implemented for GLES");
 #else
 	// Source framebuffer should not be backbuffer.
 	ENSURE(sourceFramebuffer->GetHandle() != 0);
 	ENSURE(destinationFramebuffer != sourceFramebuffer);
 	glBindFramebufferEXT(GL_READ_FRAMEBUFFER_EXT, sourceFramebuffer->GetHandle());
 	glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, destinationFramebuffer->GetHandle());
 	// TODO: add more check for internal formats. And currently we don't support
 	// scaling inside blit.
 	glBlitFramebufferEXT(
 		0, 0, sourceFramebuffer->GetWidth(), sourceFramebuffer->GetHeight(),
 		0, 0, sourceFramebuffer->GetWidth(), sourceFramebuffer->GetHeight(),
 		(sourceFramebuffer->GetAttachmentMask() & destinationFramebuffer->GetAttachmentMask()),
 		GL_NEAREST);
 	ogl_WarnIfError();
 #endif
 }
 
 void CDeviceCommandContext::ClearFramebuffer()
 {
 	ClearFramebuffer(true, true, true);
 }
 
 void CDeviceCommandContext::ClearFramebuffer(const bool color, const bool depth, const bool stencil)
 {
 	const bool needsColor = color && (m_Framebuffer->GetAttachmentMask() & GL_COLOR_BUFFER_BIT) != 0;
 	const bool needsDepth = depth && (m_Framebuffer->GetAttachmentMask() & GL_DEPTH_BUFFER_BIT) != 0;
 	const bool needsStencil = stencil && (m_Framebuffer->GetAttachmentMask() & GL_STENCIL_BUFFER_BIT) != 0;
 	GLbitfield mask = 0;
 	if (needsColor)
 	{
 		ApplyColorMask(ColorWriteMask::RED | ColorWriteMask::GREEN | ColorWriteMask::BLUE | ColorWriteMask::ALPHA);
 		glClearColor(
 			m_Framebuffer->GetClearColor().r,
 			m_Framebuffer->GetClearColor().g,
 			m_Framebuffer->GetClearColor().b,
 			m_Framebuffer->GetClearColor().a);
 		mask |= GL_COLOR_BUFFER_BIT;
 	}
 	if (needsDepth)
 	{
 		ApplyDepthMask(true);
 		mask |= GL_DEPTH_BUFFER_BIT;
 	}
 	if (needsStencil)
 	{
 		ApplyStencilMask(std::numeric_limits<uint32_t>::max());
 		mask |= GL_STENCIL_BUFFER_BIT;
 	}
 	glClear(mask);
 	ogl_WarnIfError();
 	if (needsColor)
 		ApplyColorMask(m_GraphicsPipelineStateDesc.blendState.colorWriteMask);
 	if (needsDepth)
 		ApplyDepthMask(m_GraphicsPipelineStateDesc.depthStencilState.depthWriteEnabled);
 	if (needsStencil)
 		ApplyStencilMask(m_GraphicsPipelineStateDesc.depthStencilState.stencilWriteMask);
 }
 
 void CDeviceCommandContext::BeginFramebufferPass(IFramebuffer* framebuffer)
 {
 	ENSURE(!m_InsideFramebufferPass);
 	m_InsideFramebufferPass = true;
 	ENSURE(framebuffer);
 	m_Framebuffer = framebuffer->As<CFramebuffer>();
 	ENSURE(m_Framebuffer->GetHandle() == 0 || (m_Framebuffer->GetWidth() > 0 && m_Framebuffer->GetHeight() > 0));
 	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, m_Framebuffer->GetHandle());
 	ogl_WarnIfError();
 }
 
 void CDeviceCommandContext::EndFramebufferPass()
 {
 	ENSURE(m_InsideFramebufferPass);
 	m_InsideFramebufferPass = false;
 	m_Framebuffer = static_cast<CFramebuffer*>(m_Device->GetCurrentBackbuffer());
 	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, m_Framebuffer->GetHandle());
 	ogl_WarnIfError();
 }
 
 void CDeviceCommandContext::ReadbackFramebufferSync(
 	const uint32_t x, const uint32_t y, const uint32_t width, const uint32_t height,
 	void* data)
 {
 	ENSURE(m_Framebuffer);
 	glReadPixels(x, y, width, height, GL_RGB, GL_UNSIGNED_BYTE, data);
 	ogl_WarnIfError();
 }
 
 void CDeviceCommandContext::SetScissors(const uint32_t scissorCount, const Rect* scissors)
 {
 	ENSURE(scissorCount <= 1);
 	if (scissorCount == 0)
 	{
 		if (m_ScissorCount != scissorCount)
 			glDisable(GL_SCISSOR_TEST);
 	}
 	else
 	{
 		if (m_ScissorCount != scissorCount)
 			glEnable(GL_SCISSOR_TEST);
 		ENSURE(scissors);
 		if (m_ScissorCount != scissorCount || m_Scissors[0] != scissors[0])
 		{
 			m_Scissors[0] = scissors[0];
 			glScissor(m_Scissors[0].x, m_Scissors[0].y, m_Scissors[0].width, m_Scissors[0].height);
 		}
 	}
 	ogl_WarnIfError();
 	m_ScissorCount = scissorCount;
 }
 
 void CDeviceCommandContext::SetViewports(const uint32_t viewportCount, const Rect* viewports)
 {
 	ENSURE(viewportCount == 1);
 	glViewport(viewports[0].x, viewports[0].y, viewports[0].width, viewports[0].height);
 	ogl_WarnIfError();
 }
 
 void CDeviceCommandContext::SetVertexAttributeFormat(
 	const VertexAttributeStream stream,
 	const Format format,
 	const uint32_t offset,
 	const uint32_t stride,
 	const VertexAttributeRate rate,
 	const uint32_t bindingSlot)
 {
 	const uint32_t index = static_cast<uint32_t>(stream);
 	ENSURE(index < m_VertexAttributeFormat.size());
 	ENSURE(bindingSlot < m_VertexAttributeFormat.size());
 	if (!m_VertexAttributeFormat[index].active)
 		return;
 	m_VertexAttributeFormat[index].format = format;
 	m_VertexAttributeFormat[index].offset = offset;
 	m_VertexAttributeFormat[index].stride = stride;
 	m_VertexAttributeFormat[index].rate = rate;
 	m_VertexAttributeFormat[index].bindingSlot = bindingSlot;
 
 	m_VertexAttributeFormat[index].initialized = true;
 }
 
 void CDeviceCommandContext::SetVertexBuffer(
 	const uint32_t bindingSlot, IBuffer* buffer, const uint32_t offset)
 {
 	ENSURE(buffer);
 	ENSURE(buffer->GetType() == IBuffer::Type::VERTEX);
 	ENSURE(m_ShaderProgram);
 	BindBuffer(buffer->GetType(), buffer->As<CBuffer>());
 	for (size_t index = 0; index < m_VertexAttributeFormat.size(); ++index)
 	{
 		if (!m_VertexAttributeFormat[index].active || m_VertexAttributeFormat[index].bindingSlot != bindingSlot)
 			continue;
 		ENSURE(m_VertexAttributeFormat[index].initialized);
 		const VertexAttributeStream stream = static_cast<VertexAttributeStream>(index);
 		m_ShaderProgram->VertexAttribPointer(stream,
 			m_VertexAttributeFormat[index].format,
 			m_VertexAttributeFormat[index].offset + offset,
 			m_VertexAttributeFormat[index].stride,
 			m_VertexAttributeFormat[index].rate,
 			nullptr);
 	}
 }
 
 void CDeviceCommandContext::SetVertexBufferData(
 	const uint32_t bindingSlot, const void* data, const uint32_t dataSize)
 {
 	ENSURE(data);
 	ENSURE(m_ShaderProgram);
 	ENSURE(dataSize > 0);
 	BindBuffer(CBuffer::Type::VERTEX, nullptr);
 	for (size_t index = 0; index < m_VertexAttributeFormat.size(); ++index)
 	{
 		if (!m_VertexAttributeFormat[index].active || m_VertexAttributeFormat[index].bindingSlot != bindingSlot)
 			continue;
 		ENSURE(m_VertexAttributeFormat[index].initialized);
 		const VertexAttributeStream stream = static_cast<VertexAttributeStream>(index);
 		// We don't know how many vertices will be used in a draw command, so we
 		// assume at least one vertex.
 		ENSURE(dataSize >= m_VertexAttributeFormat[index].offset + m_VertexAttributeFormat[index].stride);
 		m_ShaderProgram->VertexAttribPointer(stream,
 			m_VertexAttributeFormat[index].format,
 			m_VertexAttributeFormat[index].offset,
 			m_VertexAttributeFormat[index].stride,
 			m_VertexAttributeFormat[index].rate,
 			data);
 	}
 }
 
 void CDeviceCommandContext::SetIndexBuffer(IBuffer* buffer)
 {
 	ENSURE(buffer->GetType() == CBuffer::Type::INDEX);
 	m_IndexBuffer = buffer->As<CBuffer>();
 	m_IndexBufferData = nullptr;
 	BindBuffer(CBuffer::Type::INDEX, m_IndexBuffer);
 }
 
 void CDeviceCommandContext::SetIndexBufferData(const void* data, const uint32_t dataSize)
 {
 	ENSURE(dataSize > 0);
 	if (m_IndexBuffer)
 	{
 		BindBuffer(CBuffer::Type::INDEX, nullptr);
 		m_IndexBuffer = nullptr;
 	}
 	m_IndexBufferData = data;
 }
 
 void CDeviceCommandContext::BeginPass()
 {
 	ENSURE(!m_InsidePass);
 	m_InsidePass = true;
 }
 
 void CDeviceCommandContext::EndPass()
 {
 	ENSURE(m_InsidePass);
 	m_InsidePass = false;
 }
 
 void CDeviceCommandContext::Draw(
 	const uint32_t firstVertex, const uint32_t vertexCount)
 {
 	ENSURE(m_ShaderProgram);
 	ENSURE(m_InsidePass);
 	// Some drivers apparently don't like count = 0 in glDrawArrays here, so skip
 	// all drawing in that case.
 	if (vertexCount == 0)
 		return;
 	m_ShaderProgram->AssertPointersBound();
 	glDrawArrays(GL_TRIANGLES, firstVertex, vertexCount);
 	ogl_WarnIfError();
 }
 
 void CDeviceCommandContext::DrawIndexed(
 	const uint32_t firstIndex, const uint32_t indexCount, const int32_t vertexOffset)
 {
 	ENSURE(m_ShaderProgram);
 	ENSURE(m_InsidePass);
 	if (indexCount == 0)
 		return;
 	ENSURE(m_IndexBuffer || m_IndexBufferData);
 	ENSURE(vertexOffset == 0);
 	if (m_IndexBuffer)
 	{
 		ENSURE(sizeof(uint16_t) * (firstIndex + indexCount) <= m_IndexBuffer->GetSize());
 	}
 	m_ShaderProgram->AssertPointersBound();
 	// Don't use glMultiDrawElements here since it doesn't have a significant
 	// performance impact and it suffers from various driver bugs (e.g. it breaks
 	// in Mesa 7.10 swrast with index VBOs).
 	glDrawElements(GL_TRIANGLES, indexCount, GL_UNSIGNED_SHORT,
 		static_cast<const void*>((static_cast<const uint8_t*>(m_IndexBufferData) + sizeof(uint16_t) * firstIndex)));
 	ogl_WarnIfError();
 }
 
 void CDeviceCommandContext::DrawInstanced(
 	const uint32_t firstVertex, const uint32_t vertexCount,
 	const uint32_t firstInstance, const uint32_t instanceCount)
 {
 	ENSURE(m_Device->GetCapabilities().instancing);
 	ENSURE(m_ShaderProgram);
 	ENSURE(m_InsidePass);
 	if (vertexCount == 0 || instanceCount == 0)
 		return;
 	ENSURE(firstInstance == 0);
 	m_ShaderProgram->AssertPointersBound();
 #if CONFIG2_GLES
 	ENSURE(!m_Device->GetCapabilities().instancing);
 	UNUSED2(firstVertex);
 	UNUSED2(vertexCount);
 	UNUSED2(instanceCount);
 #else
 	glDrawArraysInstancedARB(GL_TRIANGLES, firstVertex, vertexCount, instanceCount);
 #endif
 	ogl_WarnIfError();
 }
 
 void CDeviceCommandContext::DrawIndexedInstanced(
 	const uint32_t firstIndex, const uint32_t indexCount,
 	const uint32_t firstInstance, const uint32_t instanceCount,
 	const int32_t vertexOffset)
 {
 	ENSURE(m_Device->GetCapabilities().instancing);
 	ENSURE(m_ShaderProgram);
 	ENSURE(m_InsidePass);
 	ENSURE(m_IndexBuffer || m_IndexBufferData);
 	if (indexCount == 0)
 		return;
 	ENSURE(firstInstance == 0 && vertexOffset == 0);
 	if (m_IndexBuffer)
 	{
 		ENSURE(sizeof(uint16_t) * (firstIndex + indexCount) <= m_IndexBuffer->GetSize());
 	}
 	m_ShaderProgram->AssertPointersBound();
 	// Don't use glMultiDrawElements here since it doesn't have a significant
 	// performance impact and it suffers from various driver bugs (e.g. it breaks
 	// in Mesa 7.10 swrast with index VBOs).
 #if CONFIG2_GLES
 	ENSURE(!m_Device->GetCapabilities().instancing);
 	UNUSED2(indexCount);
 	UNUSED2(firstIndex);
 	UNUSED2(instanceCount);
 #else
 	glDrawElementsInstancedARB(GL_TRIANGLES, indexCount, GL_UNSIGNED_SHORT,
 		static_cast<const void*>((static_cast<const uint8_t*>(m_IndexBufferData) + sizeof(uint16_t) * firstIndex)),
 		instanceCount);
 #endif
 	ogl_WarnIfError();
 }
 
 void CDeviceCommandContext::DrawIndexedInRange(
 	const uint32_t firstIndex, const uint32_t indexCount,
 	const uint32_t start, const uint32_t end)
 {
 	ENSURE(m_ShaderProgram);
 	ENSURE(m_InsidePass);
 	if (indexCount == 0)
 		return;
 	ENSURE(m_IndexBuffer || m_IndexBufferData);
 	const void* indices =
 		static_cast<const void*>((static_cast<const uint8_t*>(m_IndexBufferData) + sizeof(uint16_t) * firstIndex));
 	m_ShaderProgram->AssertPointersBound();
 	// Draw with DrawRangeElements where available, since it might be more
 	// efficient for slow hardware.
 #if CONFIG2_GLES
 	UNUSED2(start);
 	UNUSED2(end);
 	glDrawElements(GL_TRIANGLES, indexCount, GL_UNSIGNED_SHORT, indices);
 #else
 	glDrawRangeElementsEXT(GL_TRIANGLES, start, end, indexCount, GL_UNSIGNED_SHORT, indices);
 #endif
 	ogl_WarnIfError();
 }
 
 void CDeviceCommandContext::SetTexture(const int32_t bindingSlot, ITexture* texture)
 {
 	ENSURE(m_ShaderProgram);
 	ENSURE(texture);
 	ENSURE(texture->GetUsage() & Renderer::Backend::ITexture::Usage::SAMPLED);
 
 	const CShaderProgram::TextureUnit textureUnit =
 		m_ShaderProgram->GetTextureUnit(bindingSlot);
 	if (!textureUnit.type)
 		return;
 
 	if (textureUnit.type != GL_SAMPLER_2D &&
 #if !CONFIG2_GLES
 		textureUnit.type != GL_SAMPLER_2D_SHADOW &&
 #endif
 		textureUnit.type != GL_SAMPLER_CUBE)
 	{
 		LOGERROR("CDeviceCommandContext::SetTexture: expected sampler at binding slot");
 		return;
 	}
 
 #if !CONFIG2_GLES
 	if (textureUnit.type == GL_SAMPLER_2D_SHADOW)
 	{
 		if (!IsDepthTexture(texture->GetFormat()))
 		{
 			LOGERROR("CDeviceCommandContext::SetTexture: Invalid texture type (expected depth texture)");
 			return;
 		}
 	}
 #endif
 
 	ENSURE(textureUnit.unit >= 0);
 	const uint32_t unit = textureUnit.unit;
 	if (unit >= m_BoundTextures.size())
 	{
 		LOGERROR("CDeviceCommandContext::SetTexture: Invalid texture unit (too big)");
 		return;
 	}
 	BindTexture(unit, textureUnit.target, texture->As<CTexture>()->GetHandle());
 }
 
 void CDeviceCommandContext::SetUniform(
 	const int32_t bindingSlot,
 	const float value)
 {
 	ENSURE(m_ShaderProgram);
 	m_ShaderProgram->SetUniform(bindingSlot, value);
 }
 
 void CDeviceCommandContext::SetUniform(
 	const int32_t bindingSlot,
 	const float valueX, const float valueY)
 {
 	ENSURE(m_ShaderProgram);
 	m_ShaderProgram->SetUniform(bindingSlot, valueX, valueY);
 }
 
 void CDeviceCommandContext::SetUniform(
 	const int32_t bindingSlot,
 	const float valueX, const float valueY,
 	const float valueZ)
 {
 	ENSURE(m_ShaderProgram);
 	m_ShaderProgram->SetUniform(bindingSlot, valueX, valueY, valueZ);
 }
 
 void CDeviceCommandContext::SetUniform(
 	const int32_t bindingSlot,
 	const float valueX, const float valueY,
 	const float valueZ, const float valueW)
 {
 	ENSURE(m_ShaderProgram);
 	m_ShaderProgram->SetUniform(bindingSlot, valueX, valueY, valueZ, valueW);
 }
 
 void CDeviceCommandContext::SetUniform(
 	const int32_t bindingSlot, PS::span<const float> values)
 {
 	ENSURE(m_ShaderProgram);
 	m_ShaderProgram->SetUniform(bindingSlot, values);
 }
 
 CDeviceCommandContext::ScopedBind::ScopedBind(
 	CDeviceCommandContext* deviceCommandContext,
 	const GLenum target, const GLuint handle)
 	: m_DeviceCommandContext(deviceCommandContext),
 	m_OldBindUnit(deviceCommandContext->m_BoundTextures[deviceCommandContext->m_ActiveTextureUnit]),
 	m_ActiveTextureUnit(deviceCommandContext->m_ActiveTextureUnit)
 {
 	const uint32_t unit = m_DeviceCommandContext->m_BoundTextures.size() - 1;
 	m_DeviceCommandContext->BindTexture(unit, target, handle);
 }
 
 CDeviceCommandContext::ScopedBind::~ScopedBind()
 {
 	m_DeviceCommandContext->BindTexture(
 		m_ActiveTextureUnit, m_OldBindUnit.target, m_OldBindUnit.handle);
 }
 
 CDeviceCommandContext::ScopedBufferBind::ScopedBufferBind(
 	CDeviceCommandContext* deviceCommandContext, CBuffer* buffer)
 	: m_DeviceCommandContext(deviceCommandContext)
 {
 	ENSURE(buffer);
 	m_CacheIndex = static_cast<size_t>(buffer->GetType());
 	const GLenum target = BufferTypeToGLTarget(buffer->GetType());
 	const GLuint handle = buffer->GetHandle();
 	if (m_DeviceCommandContext->m_BoundBuffers[m_CacheIndex].first == target &&
 		m_DeviceCommandContext->m_BoundBuffers[m_CacheIndex].second == handle)
 	{
 		// Use an invalid index as a sign that we don't need to restore the
 		// bound buffer.
 		m_CacheIndex = m_DeviceCommandContext->m_BoundBuffers.size();
 	}
 	else
 	{
 		glBindBufferARB(target, handle);
 	}
 }
 
 CDeviceCommandContext::ScopedBufferBind::~ScopedBufferBind()
 {
 	if (m_CacheIndex >= m_DeviceCommandContext->m_BoundBuffers.size())
 		return;
 	glBindBufferARB(
 		m_DeviceCommandContext->m_BoundBuffers[m_CacheIndex].first,
 		m_DeviceCommandContext->m_BoundBuffers[m_CacheIndex].second);
 }
 
 } // namespace GL
 
 } // namespace Backend
 
 } // namespace Renderer