Index: ps/trunk/source/renderer/backend/gl/DeviceCommandContext.cpp
===================================================================
--- ps/trunk/source/renderer/backend/gl/DeviceCommandContext.cpp	(revision 26524)
+++ ps/trunk/source/renderer/backend/gl/DeviceCommandContext.cpp	(revision 26525)
@@ -1,734 +1,809 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "DeviceCommandContext.h"
 
 #include "ps/CLogger.h"
 #include "renderer/backend/gl/Buffer.h"
 #include "renderer/backend/gl/Device.h"
 #include "renderer/backend/gl/Framebuffer.h"
 #include "renderer/backend/gl/Mapping.h"
 #include "renderer/backend/gl/Texture.h"
 
 #include <algorithm>
 #include <cstring>
 #include <limits>
 
 namespace Renderer
 {
 
 namespace Backend
 {
 
 namespace GL
 {
 
 namespace
 {
 
 bool operator==(const StencilOpState& lhs, const StencilOpState& rhs)
 {
 	return
 		lhs.failOp == rhs.failOp &&
 		lhs.passOp == rhs.passOp &&
 		lhs.depthFailOp == rhs.depthFailOp &&
 		lhs.compareOp == rhs.compareOp;
 }
 bool operator!=(const StencilOpState& lhs, const StencilOpState& rhs)
 {
 	return !operator==(lhs, rhs);
 }
 
 bool operator==(
 	const CDeviceCommandContext::Rect& lhs,
 	const CDeviceCommandContext::Rect& rhs)
 {
 	return
 		lhs.x == rhs.x && lhs.y == rhs.y &&
 		lhs.width == rhs.width && lhs.height == rhs.height;
 }
 
 bool operator!=(
 	const CDeviceCommandContext::Rect& lhs,
 	const CDeviceCommandContext::Rect& rhs)
 {
 	return !operator==(lhs, rhs);
 }
 
 void ApplyDepthMask(const bool depthWriteEnabled)
 {
 	glDepthMask(depthWriteEnabled ? GL_TRUE : GL_FALSE);
 }
 
 void ApplyColorMask(const uint8_t colorWriteMask)
 {
 	glColorMask(
 		(colorWriteMask & ColorWriteMask::RED) != 0 ? GL_TRUE : GL_FALSE,
 		(colorWriteMask & ColorWriteMask::GREEN) != 0 ? GL_TRUE : GL_FALSE,
 		(colorWriteMask & ColorWriteMask::BLUE) != 0 ? GL_TRUE : GL_FALSE,
 		(colorWriteMask & ColorWriteMask::ALPHA) != 0 ? GL_TRUE : GL_FALSE);
 }
 
 void ApplyStencilMask(const uint32_t stencilWriteMask)
 {
 	glStencilMask(stencilWriteMask);
 }
 
 GLenum BufferTypeToGLTarget(const CBuffer::Type type)
 {
 	GLenum target = GL_ARRAY_BUFFER;
 	switch (type)
 	{
 	case CBuffer::Type::VERTEX:
 		target = GL_ARRAY_BUFFER;
 		break;
 	case CBuffer::Type::INDEX:
 		target = GL_ELEMENT_ARRAY_BUFFER;
 		break;
 	};
 	return target;
 }
 
 void UploadBufferRegionImpl(
 	const GLenum target, const uint32_t dataOffset, const uint32_t dataSize,
 	const CDeviceCommandContext::UploadBufferFunction& uploadFunction)
 {
 	ENSURE(dataOffset < dataSize);
 	while (true)
 	{
 		void* mappedData = glMapBufferARB(target, GL_WRITE_ONLY);
 		if (mappedData == nullptr)
 		{
 			// This shouldn't happen unless we run out of virtual address space
 			LOGERROR("glMapBuffer failed");
 			break;
 		}
 
 		uploadFunction(static_cast<u8*>(mappedData) + dataOffset);
 
 		if (glUnmapBufferARB(target) == GL_TRUE)
 			break;
 
 		// Unmap might fail on e.g. resolution switches, so just try again
 		// and hope it will eventually succeed
 		LOGMESSAGE("glUnmapBuffer failed, trying again...\n");
 	}
 }
 
 } // anonymous namespace
 
 // static
 std::unique_ptr<CDeviceCommandContext> CDeviceCommandContext::Create(CDevice* device)
 {
 	std::unique_ptr<CDeviceCommandContext> deviceCommandContext(new CDeviceCommandContext(device));
 	deviceCommandContext->m_Framebuffer = device->GetCurrentBackbuffer();
 	deviceCommandContext->ResetStates();
 	return deviceCommandContext;
 }
 
 CDeviceCommandContext::CDeviceCommandContext(CDevice* device)
 	: m_Device(device)
 {
 	glActiveTexture(GL_TEXTURE0);
 	glBindTexture(GL_TEXTURE_2D, 0);
 	for (std::pair<GLenum, GLuint>& unit : m_BoundTextures)
 		unit.first = unit.second = 0;
 }
 
 CDeviceCommandContext::~CDeviceCommandContext() = default;
 
 void CDeviceCommandContext::SetGraphicsPipelineState(
 	const GraphicsPipelineStateDesc& pipelineStateDesc)
 {
 	SetGraphicsPipelineStateImpl(pipelineStateDesc, false);
 }
 
 void CDeviceCommandContext::UploadTexture(
 	CTexture* texture, const Format format,
 	const void* data, const size_t dataSize,
 	const uint32_t level, const uint32_t layer)
 {
 	UploadTextureRegion(texture, format, data, dataSize,
 		0, 0,
 		std::max(1u, texture->GetWidth() >> level),
 		std::max(1u, texture->GetHeight() >> level),
 		level, layer);
 }
 
 void CDeviceCommandContext::UploadTextureRegion(
 	CTexture* texture, const Format dataFormat,
 	const void* data, const size_t dataSize,
 	const uint32_t xOffset, const uint32_t yOffset,
 	const uint32_t width, const uint32_t height,
 	const uint32_t level, const uint32_t layer)
 {
 	ENSURE(texture);
 	ENSURE(width > 0 && height > 0);
 	if (texture->GetType() == CTexture::Type::TEXTURE_2D)
 	{
 		ENSURE(layer == 0);
 		if (texture->GetFormat() == Format::R8G8B8A8 ||
 			texture->GetFormat() == Format::R8G8B8 ||
 			texture->GetFormat() == Format::A8)
 		{
 			ENSURE(texture->GetFormat() == dataFormat);
 			size_t bytesPerPixel = 4;
 			GLenum pixelFormat = GL_RGBA;
 			switch (dataFormat)
 			{
 			case Format::R8G8B8A8:
 				break;
 			case Format::R8G8B8:
 				pixelFormat = GL_RGB;
 				bytesPerPixel = 3;
 				break;
 			case Format::A8:
 				pixelFormat = GL_ALPHA;
 				bytesPerPixel = 1;
 				break;
 			case Format::L8:
 				pixelFormat = GL_LUMINANCE;
 				bytesPerPixel = 1;
 				break;
 			default:
 				debug_warn("Unexpected format.");
 				break;
 			}
 			ENSURE(dataSize == width * height * bytesPerPixel);
 
 			ScopedBind scopedBind(this, GL_TEXTURE_2D, texture->GetHandle());
 			glTexSubImage2D(GL_TEXTURE_2D, level,
 				xOffset, yOffset, width, height,
 				pixelFormat, GL_UNSIGNED_BYTE, data);
 			ogl_WarnIfError();
 		}
 		else if (
 			texture->GetFormat() == Format::BC1_RGB ||
 			texture->GetFormat() == Format::BC1_RGBA ||
 			texture->GetFormat() == Format::BC2 ||
 			texture->GetFormat() == Format::BC3)
 		{
 			ENSURE(xOffset == 0 && yOffset == 0);
 			ENSURE(texture->GetFormat() == dataFormat);
 			// TODO: add data size check.
 
 			GLenum internalFormat = GL_COMPRESSED_RGB_S3TC_DXT1_EXT;
 			switch (texture->GetFormat())
 			{
 			case Format::BC1_RGBA:
 				internalFormat = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
 				break;
 			case Format::BC2:
 				internalFormat = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
 				break;
 			case Format::BC3:
 				internalFormat = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
 				break;
 			default:
 				break;
 			}
 
 			ScopedBind scopedBind(this, GL_TEXTURE_2D, texture->GetHandle());
 			glCompressedTexImage2DARB(GL_TEXTURE_2D, level, internalFormat, width, height, 0, dataSize, data);
 			ogl_WarnIfError();
 		}
 		else
 			debug_warn("Unsupported format");
 	}
 	else if (texture->GetType() == CTexture::Type::TEXTURE_CUBE)
 	{
 		if (texture->GetFormat() == Format::R8G8B8A8)
 		{
 			ENSURE(texture->GetFormat() == dataFormat);
 			ENSURE(level == 0 && layer < 6);
 			ENSURE(xOffset == 0 && yOffset == 0 && texture->GetWidth() == width && texture->GetHeight() == height);
 			const size_t bpp = 4;
 			ENSURE(dataSize == width * height * bpp);
 
 			// The order of layers should be the following:
 			//   front, back, top, bottom, right, left
 			static const GLenum targets[6] =
 			{
 				GL_TEXTURE_CUBE_MAP_POSITIVE_X,
 				GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
 				GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
 				GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
 				GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
 				GL_TEXTURE_CUBE_MAP_NEGATIVE_Z
 			};
 
 			ScopedBind scopedBind(this, GL_TEXTURE_CUBE_MAP, texture->GetHandle());
 			glTexImage2D(targets[layer], level, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, data);
 			ogl_WarnIfError();
 		}
 		else
 			debug_warn("Unsupported format");
 	}
 	else
 		debug_warn("Unsupported type");
 }
 
 void CDeviceCommandContext::UploadBuffer(CBuffer* buffer, const void* data, const uint32_t dataSize)
 {
 	UploadBufferRegion(buffer, data, dataSize, 0);
 }
 
 void CDeviceCommandContext::UploadBuffer(
 	CBuffer* buffer, const UploadBufferFunction& uploadFunction)
 {
 	UploadBufferRegion(buffer, 0, buffer->GetSize(), uploadFunction);
 }
 
 void CDeviceCommandContext::UploadBufferRegion(
 	CBuffer* buffer, const void* data, const uint32_t dataOffset, const uint32_t dataSize)
 {
 	ENSURE(data);
 	ENSURE(dataOffset + dataSize <= buffer->GetSize());
 	const GLenum target = BufferTypeToGLTarget(buffer->GetType());
 	glBindBufferARB(target, buffer->GetHandle());
 	if (buffer->IsDynamic())
 	{
 		// Tell the driver that it can reallocate the whole VBO
 		glBufferDataARB(target, buffer->GetSize(), nullptr, buffer->IsDynamic() ? GL_DYNAMIC_DRAW : GL_STATIC_DRAW);
 
 		// (In theory, glMapBufferRange with GL_MAP_INVALIDATE_BUFFER_BIT could be used
 		// here instead of glBufferData(..., NULL, ...) plus glMapBuffer(), but with
 		// current Intel Windows GPU drivers (as of 2015-01) it's much faster if you do
 		// the explicit glBufferData.)
 
 		UploadBufferRegion(buffer, dataOffset, dataSize, [data, dataOffset, dataSize](u8* mappedData)
 		{
 			std::memcpy(mappedData, data, dataSize);
 		});
 	}
 	else
 	{
 		glBufferSubDataARB(target, dataOffset, dataSize, data);
 	}
 	glBindBufferARB(target, 0);
 }
 
 void CDeviceCommandContext::UploadBufferRegion(
 	CBuffer* buffer, const uint32_t dataOffset, const uint32_t dataSize,
 	const UploadBufferFunction& uploadFunction)
 {
 	ENSURE(dataOffset + dataSize <= buffer->GetSize());
 	const GLenum target = BufferTypeToGLTarget(buffer->GetType());
 	glBindBufferARB(target, buffer->GetHandle());
 	ENSURE(buffer->IsDynamic());
 	UploadBufferRegionImpl(target, dataOffset, dataSize, uploadFunction);
 	glBindBufferARB(target, 0);
 }
 
 void CDeviceCommandContext::BeginScopedLabel(const char* name)
 {
 	if (!m_Device->GetCapabilities().debugScopedLabels)
 		return;
 
 	++m_ScopedLabelDepth;
 	glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0x0AD, -1, name);
 }
 
 void CDeviceCommandContext::EndScopedLabel()
 {
 	if (!m_Device->GetCapabilities().debugScopedLabels)
 		return;
 
 	ENSURE(m_ScopedLabelDepth > 0);
 	--m_ScopedLabelDepth;
 	glPopDebugGroup();
 }
 
 void CDeviceCommandContext::BindTexture(const uint32_t unit, const GLenum target, const GLuint handle)
 {
 	ENSURE(unit < m_BoundTextures.size());
 #if CONFIG2_GLES
 	ENSURE(target == GL_TEXTURE_2D || target == GL_TEXTURE_CUBE_MAP);
 #else
 	ENSURE(target == GL_TEXTURE_2D || target == GL_TEXTURE_CUBE_MAP || target == GL_TEXTURE_2D_MULTISAMPLE);
 #endif
 	if (m_BoundTextures[unit].first == target && m_BoundTextures[unit].second == handle)
 		return;
 	if (m_ActiveTextureUnit != unit)
 	{
 		glActiveTexture(GL_TEXTURE0 + unit);
 		m_ActiveTextureUnit = unit;
 	}
 	if (m_BoundTextures[unit].first != target && m_BoundTextures[unit].first && m_BoundTextures[unit].second)
 		glBindTexture(m_BoundTextures[unit].first, 0);
 	if (m_BoundTextures[unit].second != handle)
 		glBindTexture(target, handle);
 	m_BoundTextures[unit] = {target, handle};
 }
 
 void CDeviceCommandContext::BindBuffer(const CBuffer::Type type, CBuffer* buffer)
 {
-	ENSURE(!buffer || type == buffer->GetType());
+	ENSURE(!buffer || buffer->GetType() == type);
+	if (type == CBuffer::Type::INDEX)
+	{
+		if (!buffer)
+			m_IndexBuffer = nullptr;
+		m_IndexBufferData = nullptr;
+	}
 	glBindBufferARB(BufferTypeToGLTarget(type), buffer ? buffer->GetHandle() : 0);
 }
 
 void CDeviceCommandContext::Flush()
 {
 	ResetStates();
 
+	m_IndexBuffer = nullptr;
+	m_IndexBufferData = nullptr;
+
 	BindTexture(0, GL_TEXTURE_2D, 0);
+	BindBuffer(CBuffer::Type::INDEX, nullptr);
+	BindBuffer(CBuffer::Type::VERTEX, nullptr);
 
 	ENSURE(m_ScopedLabelDepth == 0);
 }
 
 void CDeviceCommandContext::ResetStates()
 {
 	SetGraphicsPipelineStateImpl(MakeDefaultGraphicsPipelineStateDesc(), true);
 	SetScissors(0, nullptr);
 	SetFramebuffer(m_Device->GetCurrentBackbuffer());
 }
 
 void CDeviceCommandContext::SetGraphicsPipelineStateImpl(
 	const GraphicsPipelineStateDesc& pipelineStateDesc, const bool force)
 {
 	const DepthStencilStateDesc& currentDepthStencilStateDesc = m_GraphicsPipelineStateDesc.depthStencilState;
 	const DepthStencilStateDesc& nextDepthStencilStateDesc = pipelineStateDesc.depthStencilState;
 	if (force || currentDepthStencilStateDesc.depthTestEnabled != nextDepthStencilStateDesc.depthTestEnabled)
 	{
 		if (nextDepthStencilStateDesc.depthTestEnabled)
 			glEnable(GL_DEPTH_TEST);
 		else
 			glDisable(GL_DEPTH_TEST);
 	}
 	if (force || currentDepthStencilStateDesc.depthCompareOp != nextDepthStencilStateDesc.depthCompareOp)
 	{
 		glDepthFunc(Mapping::FromCompareOp(nextDepthStencilStateDesc.depthCompareOp));
 	}
 	if (force || currentDepthStencilStateDesc.depthWriteEnabled != nextDepthStencilStateDesc.depthWriteEnabled)
 	{
 		ApplyDepthMask(nextDepthStencilStateDesc.depthWriteEnabled);
 	}
 
 	if (force || currentDepthStencilStateDesc.stencilTestEnabled != nextDepthStencilStateDesc.stencilTestEnabled)
 	{
 		if (nextDepthStencilStateDesc.stencilTestEnabled)
 			glEnable(GL_STENCIL_TEST);
 		else
 			glDisable(GL_STENCIL_TEST);
 	}
 	if (force ||
 		currentDepthStencilStateDesc.stencilFrontFace != nextDepthStencilStateDesc.stencilFrontFace ||
 		currentDepthStencilStateDesc.stencilBackFace != nextDepthStencilStateDesc.stencilBackFace)
 	{
 		if (nextDepthStencilStateDesc.stencilFrontFace == nextDepthStencilStateDesc.stencilBackFace)
 		{
 			glStencilOp(
 				Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilFrontFace.failOp),
 				Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilFrontFace.depthFailOp),
 				Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilFrontFace.passOp));
 		}
 		else
 		{
 			if (force || currentDepthStencilStateDesc.stencilFrontFace != nextDepthStencilStateDesc.stencilFrontFace)
 			{
 				glStencilOpSeparate(
 					GL_FRONT,
 					Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilFrontFace.failOp),
 					Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilFrontFace.depthFailOp),
 					Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilFrontFace.passOp));
 			}
 			if (force || currentDepthStencilStateDesc.stencilBackFace != nextDepthStencilStateDesc.stencilBackFace)
 			{
 				glStencilOpSeparate(
 					GL_BACK,
 					Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilBackFace.failOp),
 					Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilBackFace.depthFailOp),
 					Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilBackFace.passOp));
 			}
 		}
 	}
 	if (force || currentDepthStencilStateDesc.stencilWriteMask != nextDepthStencilStateDesc.stencilWriteMask)
 	{
 		ApplyStencilMask(nextDepthStencilStateDesc.stencilWriteMask);
 	}
 	if (force ||
 		currentDepthStencilStateDesc.stencilReference != nextDepthStencilStateDesc.stencilReference ||
 		currentDepthStencilStateDesc.stencilReadMask != nextDepthStencilStateDesc.stencilReadMask ||
 		currentDepthStencilStateDesc.stencilFrontFace.compareOp != nextDepthStencilStateDesc.stencilFrontFace.compareOp ||
 		currentDepthStencilStateDesc.stencilBackFace.compareOp != nextDepthStencilStateDesc.stencilBackFace.compareOp)
 	{
 		if (nextDepthStencilStateDesc.stencilFrontFace.compareOp == nextDepthStencilStateDesc.stencilBackFace.compareOp)
 		{
 			glStencilFunc(
 				Mapping::FromCompareOp(nextDepthStencilStateDesc.stencilFrontFace.compareOp),
 				nextDepthStencilStateDesc.stencilReference,
 				nextDepthStencilStateDesc.stencilReadMask);
 		}
 		else
 		{
 			glStencilFuncSeparate(GL_FRONT,
 				Mapping::FromCompareOp(nextDepthStencilStateDesc.stencilFrontFace.compareOp),
 				nextDepthStencilStateDesc.stencilReference,
 				nextDepthStencilStateDesc.stencilReadMask);
 			glStencilFuncSeparate(GL_BACK,
 				Mapping::FromCompareOp(nextDepthStencilStateDesc.stencilBackFace.compareOp),
 				nextDepthStencilStateDesc.stencilReference,
 				nextDepthStencilStateDesc.stencilReadMask);
 		}
 	}
 
 	const BlendStateDesc& currentBlendStateDesc = m_GraphicsPipelineStateDesc.blendState;
 	const BlendStateDesc& nextBlendStateDesc = pipelineStateDesc.blendState;
 	if (force || currentBlendStateDesc.enabled != nextBlendStateDesc.enabled)
 	{
 		if (nextBlendStateDesc.enabled)
 			glEnable(GL_BLEND);
 		else
 			glDisable(GL_BLEND);
 	}
 	if (force ||
 		currentBlendStateDesc.srcColorBlendFactor != nextBlendStateDesc.srcColorBlendFactor ||
 		currentBlendStateDesc.srcAlphaBlendFactor != nextBlendStateDesc.srcAlphaBlendFactor ||
 		currentBlendStateDesc.dstColorBlendFactor != nextBlendStateDesc.dstColorBlendFactor ||
 		currentBlendStateDesc.dstAlphaBlendFactor != nextBlendStateDesc.dstAlphaBlendFactor)
 	{
 		if (nextBlendStateDesc.srcColorBlendFactor == nextBlendStateDesc.srcAlphaBlendFactor &&
 			nextBlendStateDesc.dstColorBlendFactor == nextBlendStateDesc.dstAlphaBlendFactor)
 		{
 			glBlendFunc(
 				Mapping::FromBlendFactor(nextBlendStateDesc.srcColorBlendFactor),
 				Mapping::FromBlendFactor(nextBlendStateDesc.dstColorBlendFactor));
 		}
 		else
 		{
 			glBlendFuncSeparate(
 				Mapping::FromBlendFactor(nextBlendStateDesc.srcColorBlendFactor),
 				Mapping::FromBlendFactor(nextBlendStateDesc.dstColorBlendFactor),
 				Mapping::FromBlendFactor(nextBlendStateDesc.srcAlphaBlendFactor),
 				Mapping::FromBlendFactor(nextBlendStateDesc.dstAlphaBlendFactor));
 		}
 	}
 
 	if (force ||
 		currentBlendStateDesc.colorBlendOp != nextBlendStateDesc.colorBlendOp ||
 		currentBlendStateDesc.alphaBlendOp != nextBlendStateDesc.alphaBlendOp)
 	{
 		if (nextBlendStateDesc.colorBlendOp == nextBlendStateDesc.alphaBlendOp)
 		{
 			glBlendEquation(Mapping::FromBlendOp(nextBlendStateDesc.colorBlendOp));
 		}
 		else
 		{
 			glBlendEquationSeparate(
 				Mapping::FromBlendOp(nextBlendStateDesc.colorBlendOp),
 				Mapping::FromBlendOp(nextBlendStateDesc.alphaBlendOp));
 		}
 	}
 
 	if (force ||
 		currentBlendStateDesc.constant != nextBlendStateDesc.constant)
 	{
 		glBlendColor(
 			nextBlendStateDesc.constant.r,
 			nextBlendStateDesc.constant.g,
 			nextBlendStateDesc.constant.b,
 			nextBlendStateDesc.constant.a);
 	}
 
 	if (force ||
 		currentBlendStateDesc.colorWriteMask != nextBlendStateDesc.colorWriteMask)
 	{
 		ApplyColorMask(nextBlendStateDesc.colorWriteMask);
 	}
 
 	const RasterizationStateDesc& currentRasterizationStateDesc =
 		m_GraphicsPipelineStateDesc.rasterizationState;
 	const RasterizationStateDesc& nextRasterizationStateDesc =
 		pipelineStateDesc.rasterizationState;
 	if (force ||
 		currentRasterizationStateDesc.polygonMode != nextRasterizationStateDesc.polygonMode)
 	{
 #if !CONFIG2_GLES
 		glPolygonMode(
 			GL_FRONT_AND_BACK,
 			nextRasterizationStateDesc.polygonMode == PolygonMode::LINE ? GL_LINE : GL_FILL);
 #endif
 	}
 
 	if (force ||
 		currentRasterizationStateDesc.cullMode != nextRasterizationStateDesc.cullMode)
 	{
 		if (nextRasterizationStateDesc.cullMode == CullMode::NONE)
 		{
 			glDisable(GL_CULL_FACE);
 		}
 		else
 		{
 			if (force || currentRasterizationStateDesc.cullMode == CullMode::NONE)
 				glEnable(GL_CULL_FACE);
 			glCullFace(nextRasterizationStateDesc.cullMode == CullMode::FRONT ? GL_FRONT : GL_BACK);
 		}
 	}
 
 	if (force ||
 		currentRasterizationStateDesc.frontFace != nextRasterizationStateDesc.frontFace)
 	{
 		if (nextRasterizationStateDesc.frontFace == FrontFace::CLOCKWISE)
 			glFrontFace(GL_CW);
 		else
 			glFrontFace(GL_CCW);
 	}
 
 #if !CONFIG2_GLES
 	if (force ||
 		currentRasterizationStateDesc.depthBiasEnabled != nextRasterizationStateDesc.depthBiasEnabled)
 	{
 		if (nextRasterizationStateDesc.depthBiasEnabled)
 			glEnable(GL_POLYGON_OFFSET_FILL);
 		else
 			glDisable(GL_POLYGON_OFFSET_FILL);
 	}
 	if (force ||
 		currentRasterizationStateDesc.depthBiasConstantFactor != nextRasterizationStateDesc.depthBiasConstantFactor ||
 		currentRasterizationStateDesc.depthBiasSlopeFactor != nextRasterizationStateDesc.depthBiasSlopeFactor)
 	{
 		glPolygonOffset(
 			nextRasterizationStateDesc.depthBiasSlopeFactor,
 			nextRasterizationStateDesc.depthBiasConstantFactor);
 	}
 #endif
 
 	m_GraphicsPipelineStateDesc = pipelineStateDesc;
 }
 
 void CDeviceCommandContext::BlitFramebuffer(
 	CFramebuffer* destinationFramebuffer, CFramebuffer* sourceFramebuffer)
 {
 #if CONFIG2_GLES
 	UNUSED2(destinationFramebuffer);
 	UNUSED2(sourceFramebuffer);
 	debug_warn("CDeviceCommandContext::BlitFramebuffer is not implemented for GLES");
 #else
 	// Source framebuffer should not be backbuffer.
 	ENSURE( sourceFramebuffer->GetHandle() != 0);
 	ENSURE( destinationFramebuffer != sourceFramebuffer );
 	glBindFramebufferEXT(GL_READ_FRAMEBUFFER_EXT, sourceFramebuffer->GetHandle());
 	glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, destinationFramebuffer->GetHandle());
 	// TODO: add more check for internal formats. And currently we don't support
 	// scaling inside blit.
 	glBlitFramebufferEXT(
 		0, 0, sourceFramebuffer->GetWidth(), sourceFramebuffer->GetHeight(),
 		0, 0, sourceFramebuffer->GetWidth(), sourceFramebuffer->GetHeight(),
 		(sourceFramebuffer->GetAttachmentMask() & destinationFramebuffer->GetAttachmentMask()),
 		GL_NEAREST);
 #endif
 }
 
 void CDeviceCommandContext::ClearFramebuffer()
 {
 	ClearFramebuffer(true, true, true);
 }
 
 void CDeviceCommandContext::ClearFramebuffer(const bool color, const bool depth, const bool stencil)
 {
 	const bool needsColor = color && (m_Framebuffer->GetAttachmentMask() & GL_COLOR_BUFFER_BIT) != 0;
 	const bool needsDepth = depth && (m_Framebuffer->GetAttachmentMask() & GL_DEPTH_BUFFER_BIT) != 0;
 	const bool needsStencil = stencil && (m_Framebuffer->GetAttachmentMask() & GL_STENCIL_BUFFER_BIT) != 0;
 	GLbitfield mask = 0;
 	if (needsColor)
 	{
 		ApplyColorMask(ColorWriteMask::RED | ColorWriteMask::GREEN | ColorWriteMask::BLUE | ColorWriteMask::ALPHA);
 		glClearColor(
 			m_Framebuffer->GetClearColor().r,
 			m_Framebuffer->GetClearColor().g,
 			m_Framebuffer->GetClearColor().b,
 			m_Framebuffer->GetClearColor().a);
 		mask |= GL_COLOR_BUFFER_BIT;
 	}
 	if (needsDepth)
 	{
 		ApplyDepthMask(true);
 		mask |= GL_DEPTH_BUFFER_BIT;
 	}
 	if (needsStencil)
 	{
 		ApplyStencilMask(std::numeric_limits<uint32_t>::max());
 		mask |= GL_STENCIL_BUFFER_BIT;
 	}
 	glClear(mask);
 	if (needsColor)
 		ApplyColorMask(m_GraphicsPipelineStateDesc.blendState.colorWriteMask);
 	if (needsDepth)
 		ApplyDepthMask(m_GraphicsPipelineStateDesc.depthStencilState.depthWriteEnabled);
 	if (needsStencil)
 		ApplyStencilMask(m_GraphicsPipelineStateDesc.depthStencilState.stencilWriteMask);
 }
 
 void CDeviceCommandContext::SetFramebuffer(CFramebuffer* framebuffer)
 {
 	ENSURE(framebuffer);
 	ENSURE(framebuffer->GetHandle() == 0 || (framebuffer->GetWidth() > 0 && framebuffer->GetHeight() > 0));
 	m_Framebuffer = framebuffer;
 	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, framebuffer->GetHandle());
 }
 
 void CDeviceCommandContext::SetScissors(const uint32_t scissorCount, const Rect* scissors)
 {
 	ENSURE(scissorCount <= 1);
 	if (scissorCount == 0)
 	{
 		if (m_ScissorCount != scissorCount)
 			glDisable(GL_SCISSOR_TEST);
 	}
 	else
 	{
 		if (m_ScissorCount != scissorCount)
 			glEnable(GL_SCISSOR_TEST);
 		ENSURE(scissors);
 		if (m_ScissorCount != scissorCount || m_Scissors[0] != scissors[0])
 		{
 			m_Scissors[0] = scissors[0];
 			glScissor(m_Scissors[0].x, m_Scissors[0].y, m_Scissors[0].width, m_Scissors[0].height);
 		}
 	}
 	m_ScissorCount = scissorCount;
 }
 
 void CDeviceCommandContext::SetViewports(const uint32_t viewportCount, const Rect* viewports)
 {
 	ENSURE(viewportCount == 1);
 	glViewport(viewports[0].x, viewports[0].y, viewports[0].width, viewports[0].height);
 }
 
+void CDeviceCommandContext::SetIndexBuffer(CBuffer* buffer)
+{
+	ENSURE(buffer->GetType() == CBuffer::Type::INDEX);
+	m_IndexBuffer = buffer;
+	m_IndexBufferData = nullptr;
+	BindBuffer(CBuffer::Type::INDEX, m_IndexBuffer);
+}
+
+void CDeviceCommandContext::SetIndexBufferData(const void* data)
+{
+	if (m_IndexBuffer)
+	{
+		BindBuffer(CBuffer::Type::INDEX, nullptr);
+		m_IndexBuffer = nullptr;
+	}
+	m_IndexBufferData = data;
+}
+
+void CDeviceCommandContext::Draw(
+	const uint32_t firstVertex, const uint32_t vertexCount)
+{
+	// Some drivers apparently don't like count = 0 in glDrawArrays here, so skip
+	// all drawing in that case.
+	if (vertexCount == 0)
+		return;
+	glDrawArrays(GL_TRIANGLES, firstVertex, vertexCount);
+}
+
+void CDeviceCommandContext::DrawIndexed(
+	const uint32_t firstIndex, const uint32_t indexCount, const int32_t vertexOffset)
+{
+	if (indexCount == 0)
+		return;
+	ENSURE(m_IndexBuffer || m_IndexBufferData);
+	ENSURE(vertexOffset == 0);
+	if (m_IndexBuffer)
+	{
+		ENSURE(sizeof(uint16_t) * (firstIndex + indexCount) <= m_IndexBuffer->GetSize());
+	}
+	// Don't use glMultiDrawElements here since it doesn't have a significant
+	// performance impact and it suffers from various driver bugs (e.g. it breaks
+	// in Mesa 7.10 swrast with index VBOs).
+	glDrawElements(GL_TRIANGLES, indexCount, GL_UNSIGNED_SHORT,
+		static_cast<const void*>((static_cast<const uint8_t*>(m_IndexBufferData) + sizeof(uint16_t) * firstIndex)));
+}
+
+void CDeviceCommandContext::DrawIndexedInRange(
+	const uint32_t firstIndex, const uint32_t indexCount,
+	const uint32_t start, const uint32_t end)
+{
+	if (indexCount == 0)
+		return;
+	ENSURE(m_IndexBuffer || m_IndexBufferData);
+	const void* indices =
+		static_cast<const void*>((static_cast<const uint8_t*>(m_IndexBufferData) + sizeof(uint16_t) * firstIndex));
+	// Draw with DrawRangeElements where available, since it might be more
+	// efficient for slow hardware.
+#if CONFIG2_GLES
+	glDrawElements(GL_TRIANGLES, indexCount, GL_UNSIGNED_SHORT, indices);
+#else
+	glDrawRangeElementsEXT(GL_TRIANGLES, start, end, indexCount, GL_UNSIGNED_SHORT, indices);
+#endif
+}
+
 CDeviceCommandContext::ScopedBind::ScopedBind(
 	CDeviceCommandContext* deviceCommandContext,
 	const GLenum target, const GLuint handle)
 	: m_DeviceCommandContext(deviceCommandContext),
 	m_OldBindUnit(deviceCommandContext->m_BoundTextures[deviceCommandContext->m_ActiveTextureUnit])
 {
 	m_DeviceCommandContext->BindTexture(
 		m_DeviceCommandContext->m_ActiveTextureUnit, target, handle);
 }
 
 CDeviceCommandContext::ScopedBind::~ScopedBind()
 {
 	m_DeviceCommandContext->BindTexture(
 		m_DeviceCommandContext->m_ActiveTextureUnit, m_OldBindUnit.first, m_OldBindUnit.second);
 }
 
 } // namespace GL
 
 } // namespace Backend
 
 } // namespace Renderer
Index: ps/trunk/source/graphics/Canvas2D.cpp
===================================================================
--- ps/trunk/source/graphics/Canvas2D.cpp	(revision 26524)
+++ ps/trunk/source/graphics/Canvas2D.cpp	(revision 26525)
@@ -1,331 +1,332 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "Canvas2D.h"
 
 #include "graphics/Color.h"
 #include "graphics/ShaderManager.h"
 #include "graphics/TextRenderer.h"
 #include "graphics/TextureManager.h"
 #include "gui/GUIMatrix.h"
 #include "maths/Rect.h"
 #include "maths/Vector2D.h"
 #include "ps/CStrInternStatic.h"
 #include "renderer/Renderer.h"
 
 #include <array>
 
 namespace
 {
 
 // Array of 2D elements unrolled into 1D array.
 using PlaneArray2D = std::array<float, 12>;
 
 inline void DrawTextureImpl(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CShaderProgramPtr& shader, const CTexturePtr& texture,
 	const PlaneArray2D& vertices, PlaneArray2D uvs,
 	const CColor& multiply, const CColor& add, const float grayscaleFactor)
 {
 	texture->UploadBackendTextureIfNeeded(deviceCommandContext);
 	shader->BindTexture(str_tex, texture->GetBackendTexture());
 	for (size_t idx = 0; idx < uvs.size(); idx += 2)
 	{
 		if (texture->GetWidth() > 0.0f)
 			uvs[idx + 0] /= texture->GetWidth();
 		if (texture->GetHeight() > 0.0f)
 			uvs[idx + 1] /= texture->GetHeight();
 	}
 
 	shader->Uniform(str_colorAdd, add);
 	shader->Uniform(str_colorMul, multiply);
 	shader->Uniform(str_grayscaleFactor, grayscaleFactor);
 	shader->VertexPointer(2, GL_FLOAT, 0, vertices.data());
 	shader->TexCoordPointer(GL_TEXTURE0, 2, GL_FLOAT, 0, uvs.data());
 	shader->AssertPointersBound();
 
-	glDrawArrays(GL_TRIANGLES, 0, vertices.size() / 2);
+	deviceCommandContext->Draw(0, vertices.size() / 2);
 }
 
 } // anonymous namespace
 
 class CCanvas2D::Impl
 {
 public:
 	Impl(Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 		: DeviceCommandContext(deviceCommandContext)
 	{
 	}
 
 	void BindTechIfNeeded()
 	{
 		if (Tech)
 			return;
 
 		CShaderDefines defines;
 		Tech = g_Renderer.GetShaderManager().LoadEffect(str_canvas2d, defines);
 		ENSURE(Tech);
 		Tech->BeginPass();
 		DeviceCommandContext->SetGraphicsPipelineState(
 			Tech->GetGraphicsPipelineStateDesc());
 		const CShaderProgramPtr& shader = Tech->GetShader();
 		shader->Uniform(str_transform, GetDefaultGuiMatrix());
 	}
 
 	void UnbindTech()
 	{
 		if (!Tech)
 			return;
 
 		Tech->EndPass();
 		Tech.reset();
 	}
 
 	Renderer::Backend::GL::CDeviceCommandContext* DeviceCommandContext;
 	CShaderTechniquePtr Tech;
 };
 
 CCanvas2D::CCanvas2D(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 	: m(std::make_unique<Impl>(deviceCommandContext))
 {
 
 }
 
 CCanvas2D::~CCanvas2D()
 {
 	Flush();
 }
 
 void CCanvas2D::DrawLine(const std::vector<CVector2D>& points, const float width, const CColor& color)
 {
 	if (points.empty())
 		return;
 
 	// We could reuse the terrain line building, but it uses 3D space instead of
 	// 2D. So it can be less optimal for a canvas.
 
 	// Adding a single pixel line with alpha gradient to reduce the aliasing
 	// effect.
 	const float halfWidth = width * 0.5f + 1.0f;
 
 	struct PointIndex
 	{
 		size_t index;
 		float length;
 		CVector2D normal;
 	};
 	// Normal for the last index is undefined.
 	std::vector<PointIndex> pointsIndices;
 	pointsIndices.reserve(points.size());
 	pointsIndices.emplace_back(PointIndex{0, 0.0f, CVector2D()});
 	for (size_t index = 0; index < points.size();)
 	{
 		size_t nextIndex = index + 1;
 		CVector2D direction;
 		float length = 0.0f;
 		while (nextIndex < points.size())
 		{
 			direction = points[nextIndex] - points[pointsIndices.back().index];
 			length = direction.Length();
 			if (length >= halfWidth * 2.0f)
 			{
 				direction /= length;
 				break;
 			}
 			++nextIndex;
 		}
 		if (nextIndex == points.size())
 			break;
 		pointsIndices.back().length = length;
 		pointsIndices.back().normal = CVector2D(-direction.Y, direction.X);
 		pointsIndices.emplace_back(PointIndex{nextIndex, 0.0f, CVector2D()});
 		index = nextIndex;
 	}
 
 	if (pointsIndices.size() <= 1)
 		return;
 
 	std::vector<std::array<CVector2D, 3>> vertices;
 	std::vector<std::array<CVector2D, 3>> uvs;
 	std::vector<u16> indices;
 	const size_t reserveSize = 2 * pointsIndices.size() - 1;
 	vertices.reserve(reserveSize);
 	uvs.reserve(reserveSize);
 	indices.reserve(reserveSize * 12);
 
 	auto addVertices = [&vertices, &uvs, &indices, &halfWidth](const CVector2D& p1, const CVector2D& p2)
 	{
 		if (!vertices.empty())
 		{
 			const u16 lastVertexIndex = static_cast<u16>(vertices.size() * 3 - 1);
 			ENSURE(lastVertexIndex >= 2);
 			// First vertical half of the segment.
 			indices.emplace_back(lastVertexIndex - 2);
 			indices.emplace_back(lastVertexIndex - 1);
 			indices.emplace_back(lastVertexIndex + 2);
 			indices.emplace_back(lastVertexIndex - 2);
 			indices.emplace_back(lastVertexIndex + 2);
 			indices.emplace_back(lastVertexIndex + 1);
 			// Second vertical half of the segment.
 			indices.emplace_back(lastVertexIndex - 1);
 			indices.emplace_back(lastVertexIndex);
 			indices.emplace_back(lastVertexIndex + 3);
 			indices.emplace_back(lastVertexIndex - 1);
 			indices.emplace_back(lastVertexIndex + 3);
 			indices.emplace_back(lastVertexIndex + 2);
 		}
 		vertices.emplace_back(std::array<CVector2D, 3>{p1, (p1 + p2) / 2.0f, p2});
 		uvs.emplace_back(std::array<CVector2D, 3>{
 			CVector2D(0.0f, 0.0f),
 			CVector2D(std::max(1.0f, halfWidth - 1.0f), 0.0f),
 			CVector2D(0.0f, 0.0f)});
 	};
 
 	addVertices(
 		points[pointsIndices.front().index] - pointsIndices.front().normal * halfWidth,
 		points[pointsIndices.front().index] + pointsIndices.front().normal * halfWidth);
 	// For each pair of adjacent segments we need to add smooth transition.
 	for (size_t index = 0; index + 2 < pointsIndices.size(); ++index)
 	{
 		const PointIndex& pointIndex = pointsIndices[index];
 		const PointIndex& nextPointIndex = pointsIndices[index + 1];
 		// Angle between adjacent segments.
 		const float cosAlpha = pointIndex.normal.Dot(nextPointIndex.normal);
 		constexpr float EPS = 1e-3f;
 		// Use a simple segment if adjacent segments are almost codirectional.
 		if (cosAlpha > 1.0f - EPS)
 		{
 			addVertices(
 				points[pointIndex.index] - pointIndex.normal * halfWidth,
 				points[pointIndex.index] + pointIndex.normal * halfWidth);
 		}
 		else
 		{
 			addVertices(
 				points[nextPointIndex.index] - pointIndex.normal * halfWidth,
 				points[nextPointIndex.index] + pointIndex.normal * halfWidth);
 			// Average normal between adjacent segments. We might want to rotate it but
 			// for now we assume that it's enough for current line widths.
 			const CVector2D normal = cosAlpha < -1.0f + EPS
 				? CVector2D(pointIndex.normal.Y, -pointIndex.normal.X)
 				: ((pointIndex.normal + nextPointIndex.normal) / 2.0f).Normalized();
 			addVertices(
 				points[nextPointIndex.index] - normal * halfWidth,
 				points[nextPointIndex.index] + normal * halfWidth);
 			addVertices(
 				points[nextPointIndex.index] - nextPointIndex.normal * halfWidth,
 				points[nextPointIndex.index] + nextPointIndex.normal * halfWidth);
 		}
 		// We use 16-bit indices, it means that we can't use more than 64K vertices.
 		const size_t requiredFreeSpace = 3 * 4;
 		if (vertices.size() * 3 + requiredFreeSpace >= 65536)
 			break;
 	}
 	addVertices(
 		points[pointsIndices.back().index] - pointsIndices[pointsIndices.size() - 2].normal * halfWidth,
 		points[pointsIndices.back().index] + pointsIndices[pointsIndices.size() - 2].normal * halfWidth);
 
 	m->BindTechIfNeeded();
 
 	const CShaderProgramPtr& shader = m->Tech->GetShader();
 	shader->BindTexture(str_tex, g_Renderer.GetTextureManager().GetAlphaGradientTexture()->GetBackendTexture());
 	shader->Uniform(str_colorAdd, CColor(0.0f, 0.0f, 0.0f, 0.0f));
 	shader->Uniform(str_colorMul, color);
 	shader->Uniform(str_grayscaleFactor, 0.0f);
 	shader->VertexPointer(2, GL_FLOAT, 0, vertices.data());
 	shader->TexCoordPointer(GL_TEXTURE0, 2, GL_FLOAT, 0, uvs.data());
 	shader->AssertPointersBound();
 
-	glDrawElements(GL_TRIANGLES, indices.size(), GL_UNSIGNED_SHORT, indices.data());
+	m->DeviceCommandContext->SetIndexBufferData(indices.data());
+	m->DeviceCommandContext->DrawIndexed(0, indices.size(), 0);
 }
 
 void CCanvas2D::DrawRect(const CRect& rect, const CColor& color)
 {
 	const PlaneArray2D uvs
 	{
 		0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
 		0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
 	};
 	const PlaneArray2D vertices =
 	{
 		rect.left, rect.bottom,
 		rect.right, rect.bottom,
 		rect.right, rect.top,
 		rect.left, rect.bottom,
 		rect.right, rect.top,
 		rect.left, rect.top
 	};
 
 	m->BindTechIfNeeded();
 	DrawTextureImpl(
 		m->DeviceCommandContext, m->Tech->GetShader(),
 		g_Renderer.GetTextureManager().GetTransparentTexture(),
 		vertices, uvs, CColor(0.0f, 0.0f, 0.0f, 0.0f), color, 0.0f);
 }
 
 void CCanvas2D::DrawTexture(CTexturePtr texture, const CRect& destination)
 {
 	DrawTexture(texture,
 		destination, CRect(0, 0, texture->GetWidth(), texture->GetHeight()),
 		CColor(1.0f, 1.0f, 1.0f, 1.0f), CColor(0.0f, 0.0f, 0.0f, 0.0f), 0.0f);
 }
 
 void CCanvas2D::DrawTexture(
 	CTexturePtr texture, const CRect& destination, const CRect& source,
 	const CColor& multiply, const CColor& add, const float grayscaleFactor)
 {
 	const PlaneArray2D uvs =
 	{
 		source.left, source.bottom,
 		source.right, source.bottom,
 		source.right, source.top,
 		source.left, source.bottom,
 		source.right, source.top,
 		source.left, source.top
 	};
 	const PlaneArray2D vertices =
 	{
 		destination.left, destination.bottom,
 		destination.right, destination.bottom,
 		destination.right, destination.top,
 		destination.left, destination.bottom,
 		destination.right, destination.top,
 		destination.left, destination.top
 	};
 
 	m->BindTechIfNeeded();
 	DrawTextureImpl(m->DeviceCommandContext, m->Tech->GetShader(),
 		texture, vertices, uvs, multiply, add, grayscaleFactor);
 }
 
 void CCanvas2D::DrawText(CTextRenderer& textRenderer)
 {
 	m->BindTechIfNeeded();
 
 	const CShaderProgramPtr& shader = m->Tech->GetShader();
 	shader->Uniform(str_grayscaleFactor, 0.0f);
 
 	textRenderer.Render(m->DeviceCommandContext, shader, GetDefaultGuiMatrix());
 }
 
 void CCanvas2D::Flush()
 {
 	m->UnbindTech();
 }
Index: ps/trunk/source/graphics/LOSTexture.cpp
===================================================================
--- ps/trunk/source/graphics/LOSTexture.cpp	(revision 26524)
+++ ps/trunk/source/graphics/LOSTexture.cpp	(revision 26525)
@@ -1,428 +1,428 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "LOSTexture.h"
 
 #include "graphics/ShaderManager.h"
 #include "lib/bits.h"
 #include "lib/config2.h"
 #include "ps/CLogger.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Game.h"
 #include "ps/Profile.h"
 #include "renderer/backend/gl/Device.h"
 #include "renderer/Renderer.h"
 #include "renderer/RenderingOptions.h"
 #include "renderer/TimeManager.h"
 #include "simulation2/Simulation2.h"
 #include "simulation2/components/ICmpRangeManager.h"
 #include "simulation2/helpers/Los.h"
 
 /*
 
 The LOS bitmap is computed with one value per LOS vertex, based on
 CCmpRangeManager's visibility information.
 
 The bitmap is then blurred using an NxN filter (in particular a
 7-tap Binomial filter as an efficient integral approximation of a Gaussian).
 To implement the blur efficiently without using extra memory for a second copy
 of the bitmap, we generate the bitmap with (N-1)/2 pixels of padding on each side,
 then the blur shifts the image back into the corner.
 
 The blurred bitmap is then uploaded into a GL texture for use by the renderer.
 
 */
 
 
 // Blur with a NxN filter, where N = g_BlurSize must be an odd number.
 // Keep it in relation to the number of impassable tiles in MAP_EDGE_TILES.
 static const size_t g_BlurSize = 7;
 
 // Alignment (in bytes) of the pixel data passed into texture uploading.
 // This must be a multiple of GL_UNPACK_ALIGNMENT, which ought to be 1 (since
 // that's what we set it to) but in some weird cases appears to have a different
 // value. (See Trac #2594). Multiples of 4 are possibly good for performance anyway.
 static const size_t g_SubTextureAlignment = 4;
 
 CLOSTexture::CLOSTexture(CSimulation2& simulation)
 	: m_Simulation(simulation)
 {
 	if (CRenderer::IsInitialised() && g_RenderingOptions.GetSmoothLOS())
 		CreateShader();
 }
 
 CLOSTexture::~CLOSTexture()
 {
 	m_SmoothFramebuffers[0].reset();
 	m_SmoothFramebuffers[1].reset();
 
 	if (m_Texture)
 		DeleteTexture();
 }
 
 // Create the LOS texture engine. Should be ran only once.
 bool CLOSTexture::CreateShader()
 {
 	m_SmoothTech = g_Renderer.GetShaderManager().LoadEffect(str_los_interp);
 	CShaderProgramPtr shader = m_SmoothTech->GetShader();
 
 	m_ShaderInitialized = m_SmoothTech && shader;
 
 	if (!m_ShaderInitialized)
 	{
 		LOGERROR("Failed to load SmoothLOS shader, disabling.");
 		g_RenderingOptions.SetSmoothLOS(false);
 		return false;
 	}
 
 	return true;
 }
 
 void CLOSTexture::DeleteTexture()
 {
 	m_Texture.reset();
 	m_SmoothTextures[0].reset();
 	m_SmoothTextures[1].reset();
 }
 
 void CLOSTexture::MakeDirty()
 {
 	m_Dirty = true;
 }
 
 Renderer::Backend::GL::CTexture* CLOSTexture::GetTextureSmooth()
 {
 	if (CRenderer::IsInitialised() && !g_RenderingOptions.GetSmoothLOS())
 		return GetTexture();
 	else
 		return m_SmoothTextures[m_WhichTexture].get();
 }
 
 void CLOSTexture::InterpolateLOS(Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	const bool skipSmoothLOS = CRenderer::IsInitialised() && !g_RenderingOptions.GetSmoothLOS();
 	if (!skipSmoothLOS && !m_ShaderInitialized)
 	{
 		if (!CreateShader())
 			return;
 
 		// RecomputeTexture will not cause the ConstructTexture to run.
 		// Force the textures to be created.
 		DeleteTexture();
 		ConstructTexture(deviceCommandContext);
 		m_Dirty = true;
 	}
 
 	if (m_Dirty)
 	{
 		RecomputeTexture(deviceCommandContext);
 		m_Dirty = false;
 	}
 
 	if (skipSmoothLOS)
 		return;
 
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render LOS texture");
 	deviceCommandContext->SetFramebuffer(m_SmoothFramebuffers[m_WhichTexture].get());
 
 	m_SmoothTech->BeginPass();
 	deviceCommandContext->SetGraphicsPipelineState(
 		m_SmoothTech->GetGraphicsPipelineStateDesc());
 
 	const CShaderProgramPtr& shader = m_SmoothTech->GetShader();
 
 	shader->BindTexture(str_losTex1, m_Texture.get());
 	shader->BindTexture(str_losTex2, m_SmoothTextures[m_WhichTexture].get());
 
 	shader->Uniform(str_delta, (float)g_Renderer.GetTimeManager().GetFrameDelta() * 4.0f, 0.0f, 0.0f, 0.0f);
 
 	const SViewPort oldVp = g_Renderer.GetViewport();
 	const SViewPort vp =
 	{
 		0, 0,
 		static_cast<int>(m_Texture->GetWidth()),
 		static_cast<int>(m_Texture->GetHeight())
 	};
 	g_Renderer.SetViewport(vp);
 
 	float quadVerts[] =
 	{
 		1.0f, 1.0f,
 		-1.0f, 1.0f,
 		-1.0f, -1.0f,
 
 		-1.0f, -1.0f,
 		1.0f, -1.0f,
 		1.0f, 1.0f
 	};
 	float quadTex[] =
 	{
 		1.0f, 1.0f,
 		0.0f, 1.0f,
 		0.0f, 0.0f,
 
 		0.0f, 0.0f,
 		1.0f, 0.0f,
 		1.0f, 1.0f
 	};
 	shader->TexCoordPointer(GL_TEXTURE0, 2, GL_FLOAT, 0, quadTex);
 	shader->VertexPointer(2, GL_FLOAT, 0, quadVerts);
 	shader->AssertPointersBound();
-	glDrawArrays(GL_TRIANGLES, 0, 6);
+	deviceCommandContext->Draw(0, 6);
 
 	g_Renderer.SetViewport(oldVp);
 
 	m_SmoothTech->EndPass();
 
 	deviceCommandContext->SetFramebuffer(
 		deviceCommandContext->GetDevice()->GetCurrentBackbuffer());
 
 	m_WhichTexture = 1u - m_WhichTexture;
 }
 
 
 Renderer::Backend::GL::CTexture* CLOSTexture::GetTexture()
 {
 	ENSURE(!m_Dirty);
 	return m_Texture.get();
 }
 
 const CMatrix3D& CLOSTexture::GetTextureMatrix()
 {
 	ENSURE(!m_Dirty);
 	return m_TextureMatrix;
 }
 
 const CMatrix3D& CLOSTexture::GetMinimapTextureMatrix()
 {
 	ENSURE(!m_Dirty);
 	return m_MinimapTextureMatrix;
 }
 
 void CLOSTexture::ConstructTexture(Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	CmpPtr<ICmpRangeManager> cmpRangeManager(m_Simulation, SYSTEM_ENTITY);
 	if (!cmpRangeManager)
 		return;
 
 	m_MapSize = cmpRangeManager->GetVerticesPerSide();
 
 	const size_t textureSize = round_up_to_pow2(round_up((size_t)m_MapSize + g_BlurSize - 1, g_SubTextureAlignment));
 
 	Renderer::Backend::GL::CDevice* backendDevice = deviceCommandContext->GetDevice();
 
 	const Renderer::Backend::Sampler::Desc defaultSamplerDesc =
 		Renderer::Backend::Sampler::MakeDefaultSampler(
 			Renderer::Backend::Sampler::Filter::LINEAR,
 			Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE);
 
 	m_Texture = backendDevice->CreateTexture2D("LOSTexture",
 		Renderer::Backend::Format::A8, textureSize, textureSize, defaultSamplerDesc);
 
 	// Initialise texture with SoD color, for the areas we don't
 	// overwrite with uploading later.
 	std::unique_ptr<u8[]> texData = std::make_unique<u8[]>(textureSize * textureSize);
 	memset(texData.get(), 0x00, textureSize * textureSize);
 
 	if (CRenderer::IsInitialised() && g_RenderingOptions.GetSmoothLOS())
 	{
 		m_SmoothTextures[0] = backendDevice->CreateTexture2D("LOSSmoothTexture0",
 			Renderer::Backend::Format::A8, textureSize, textureSize, defaultSamplerDesc);
 		m_SmoothTextures[1] = backendDevice->CreateTexture2D("LOSSmoothTexture1",
 			Renderer::Backend::Format::A8, textureSize, textureSize, defaultSamplerDesc);
 
 		m_SmoothFramebuffers[0] = backendDevice->CreateFramebuffer("LOSSmoothFramebuffer0",
 			m_SmoothTextures[0].get(), nullptr);
 		m_SmoothFramebuffers[1] = backendDevice->CreateFramebuffer("LOSSmoothFramebuffer1",
 			m_SmoothTextures[1].get(), nullptr);
 		if (!m_SmoothFramebuffers[0] || !m_SmoothFramebuffers[1])
 		{
 			LOGERROR("Failed to create LOS framebuffers");
 			g_RenderingOptions.SetSmoothLOS(false);
 		}
 
 		deviceCommandContext->UploadTexture(
 			m_SmoothTextures[0].get(), Renderer::Backend::Format::A8,
 			texData.get(), textureSize * textureSize);
 		deviceCommandContext->UploadTexture(
 			m_SmoothTextures[1].get(), Renderer::Backend::Format::A8,
 			texData.get(), textureSize * textureSize);
 	}
 
 	deviceCommandContext->UploadTexture(
 		m_Texture.get(), Renderer::Backend::Format::A8,
 		texData.get(), textureSize * textureSize);
 
 	texData.reset();
 
 	{
 		// Texture matrix: We want to map
 		//   world pos (0, y, 0)  (i.e. first vertex)
 		//     onto texcoord (0.5/texsize, 0.5/texsize)  (i.e. middle of first texel);
 		//   world pos ((mapsize-1)*cellsize, y, (mapsize-1)*cellsize)  (i.e. last vertex)
 		//     onto texcoord ((mapsize-0.5) / texsize, (mapsize-0.5) / texsize)  (i.e. middle of last texel)
 
 		float s = (m_MapSize-1) / static_cast<float>(textureSize * (m_MapSize-1) * LOS_TILE_SIZE);
 		float t = 0.5f / textureSize;
 		m_TextureMatrix.SetZero();
 		m_TextureMatrix._11 = s;
 		m_TextureMatrix._23 = s;
 		m_TextureMatrix._14 = t;
 		m_TextureMatrix._24 = t;
 		m_TextureMatrix._44 = 1;
 	}
 
 	{
 		// Minimap matrix: We want to map UV (0,0)-(1,1) onto (0,0)-(mapsize/texsize, mapsize/texsize)
 
 		float s = m_MapSize / (float)textureSize;
 		m_MinimapTextureMatrix.SetZero();
 		m_MinimapTextureMatrix._11 = s;
 		m_MinimapTextureMatrix._22 = s;
 		m_MinimapTextureMatrix._44 = 1;
 	}
 }
 
 void CLOSTexture::RecomputeTexture(Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	// If the map was resized, delete and regenerate the texture
 	if (m_Texture)
 	{
 		CmpPtr<ICmpRangeManager> cmpRangeManager(m_Simulation, SYSTEM_ENTITY);
 		if (!cmpRangeManager || m_MapSize != cmpRangeManager->GetVerticesPerSide())
 			DeleteTexture();
 	}
 
 	bool recreated = false;
 	if (!m_Texture)
 	{
 		ConstructTexture(deviceCommandContext);
 		recreated = true;
 	}
 
 	PROFILE("recompute LOS texture");
 
 	size_t pitch;
 	const size_t dataSize = GetBitmapSize(m_MapSize, m_MapSize, &pitch);
 	ENSURE(pitch * m_MapSize <= dataSize);
 	std::unique_ptr<u8[]> losData = std::make_unique<u8[]>(dataSize);
 
 	CmpPtr<ICmpRangeManager> cmpRangeManager(m_Simulation, SYSTEM_ENTITY);
 	if (!cmpRangeManager)
 		return;
 
 	CLosQuerier los(cmpRangeManager->GetLosQuerier(g_Game->GetSimulation2()->GetSimContext().GetCurrentDisplayedPlayer()));
 
 	GenerateBitmap(los, &losData[0], m_MapSize, m_MapSize, pitch);
 
 	if (CRenderer::IsInitialised() && g_RenderingOptions.GetSmoothLOS() && recreated)
 	{
 		deviceCommandContext->UploadTextureRegion(
 			m_SmoothTextures[0].get(), Renderer::Backend::Format::A8, losData.get(),
 			pitch * m_MapSize, 0, 0, pitch, m_MapSize);
 		deviceCommandContext->UploadTextureRegion(
 			m_SmoothTextures[1].get(), Renderer::Backend::Format::A8, losData.get(),
 			pitch * m_MapSize, 0, 0, pitch, m_MapSize);
 	}
 
 	deviceCommandContext->UploadTextureRegion(
 		m_Texture.get(), Renderer::Backend::Format::A8, losData.get(),
 		pitch * m_MapSize, 0, 0, pitch, m_MapSize);
 }
 
 size_t CLOSTexture::GetBitmapSize(size_t w, size_t h, size_t* pitch)
 {
 	*pitch = round_up(w + g_BlurSize - 1, g_SubTextureAlignment);
 	return *pitch * (h + g_BlurSize - 1);
 }
 
 void CLOSTexture::GenerateBitmap(const CLosQuerier& los, u8* losData, size_t w, size_t h, size_t pitch)
 {
 	u8 *dataPtr = losData;
 
 	// Initialise the top padding
 	for (size_t j = 0; j < g_BlurSize/2; ++j)
 		for (size_t i = 0; i < pitch; ++i)
 			*dataPtr++ = 0;
 
 	for (size_t j = 0; j < h; ++j)
 	{
 		// Initialise the left padding
 		for (size_t i = 0; i < g_BlurSize/2; ++i)
 			*dataPtr++ = 0;
 
 		// Fill in the visibility data
 		for (size_t i = 0; i < w; ++i)
 		{
 			if (los.IsVisible_UncheckedRange(i, j))
 				*dataPtr++ = 255;
 			else if (los.IsExplored_UncheckedRange(i, j))
 				*dataPtr++ = 127;
 			else
 				*dataPtr++ = 0;
 		}
 
 		// Initialise the right padding
 		for (size_t i = 0; i < pitch - w - g_BlurSize/2; ++i)
 			*dataPtr++ = 0;
 	}
 
 	// Initialise the bottom padding
 	for (size_t j = 0; j < g_BlurSize/2; ++j)
 		for (size_t i = 0; i < pitch; ++i)
 			*dataPtr++ = 0;
 
 	// Horizontal blur:
 
 	for (size_t j = g_BlurSize/2; j < h + g_BlurSize/2; ++j)
 	{
 		for (size_t i = 0; i < w; ++i)
 		{
 			u8* d = &losData[i+j*pitch];
 			*d = (
 				1*d[0] +
 				6*d[1] +
 				15*d[2] +
 				20*d[3] +
 				15*d[4] +
 				6*d[5] +
 				1*d[6]
 			) / 64;
 		}
 	}
 
 	// Vertical blur:
 
 	for (size_t j = 0; j < h; ++j)
 	{
 		for (size_t i = 0; i < w; ++i)
 		{
 			u8* d = &losData[i+j*pitch];
 			*d = (
 				1*d[0*pitch] +
 				6*d[1*pitch] +
 				15*d[2*pitch] +
 				20*d[3*pitch] +
 				15*d[4*pitch] +
 				6*d[5*pitch] +
 				1*d[6*pitch]
 			) / 64;
 		}
 	}
 }
Index: ps/trunk/source/graphics/MiniMapTexture.cpp
===================================================================
--- ps/trunk/source/graphics/MiniMapTexture.cpp	(revision 26524)
+++ ps/trunk/source/graphics/MiniMapTexture.cpp	(revision 26525)
@@ -1,563 +1,566 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "MiniMapTexture.h"
 
 #include "graphics/GameView.h"
 #include "graphics/LOSTexture.h"
 #include "graphics/MiniPatch.h"
 #include "graphics/ShaderManager.h"
 #include "graphics/ShaderProgramPtr.h"
 #include "graphics/Terrain.h"
 #include "graphics/TerrainTextureEntry.h"
 #include "graphics/TerrainTextureManager.h"
 #include "graphics/TerritoryTexture.h"
 #include "graphics/TextureManager.h"
 #include "lib/bits.h"
 #include "lib/timer.h"
 #include "maths/Vector2D.h"
 #include "ps/ConfigDB.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Filesystem.h"
 #include "ps/Game.h"
 #include "ps/World.h"
 #include "ps/XML/Xeromyces.h"
 #include "renderer/backend/gl/Device.h"
 #include "renderer/Renderer.h"
 #include "renderer/RenderingOptions.h"
 #include "renderer/SceneRenderer.h"
 #include "renderer/WaterManager.h"
 #include "scriptinterface/Object.h"
 #include "simulation2/Simulation2.h"
 #include "simulation2/components/ICmpMinimap.h"
 #include "simulation2/components/ICmpRangeManager.h"
 #include "simulation2/system/ParamNode.h"
 
 namespace
 {
 
 // Set max drawn entities to 64K / 4 for now, which is more than enough.
 // 4 is the number of vertices per entity.
 // TODO: we should be cleverer about drawing them to reduce clutter,
 // f.e. use instancing.
 const size_t MAX_ENTITIES_DRAWN = 65536 / 4;
 
 const size_t FINAL_TEXTURE_SIZE = 512;
 
 unsigned int ScaleColor(unsigned int color, float x)
 {
 	unsigned int r = unsigned(float(color & 0xff) * x);
 	unsigned int g = unsigned(float((color >> 8) & 0xff) * x);
 	unsigned int b = unsigned(float((color >> 16) & 0xff) * x);
 	return (0xff000000 | b | g << 8 | r << 16);
 }
 
-void DrawTexture(const CShaderProgramPtr& shader)
+void DrawTexture(
+	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
+	const CShaderProgramPtr& shader)
 {
 	const float quadUVs[] =
 	{
 		0.0f, 0.0f,
 		1.0f, 0.0f,
 		1.0f, 1.0f,
 
 		1.0f, 1.0f,
 		0.0f, 1.0f,
 		0.0f, 0.0f
 	};
 	const float quadVertices[] =
 	{
 		-1.0f, -1.0f, 0.0f,
 		1.0f, -1.0f, 0.0f,
 		1.0f, 1.0f, 0.0f,
 
 		1.0f, 1.0f, 0.0f,
 		-1.0f, 1.0f, 0.0f,
 		-1.0f, -1.0f, 0.0f
 	};
 
 	shader->TexCoordPointer(GL_TEXTURE0, 2, GL_FLOAT, 0, quadUVs);
 	shader->VertexPointer(3, GL_FLOAT, 0, quadVertices);
 	shader->AssertPointersBound();
 
-	glDrawArrays(GL_TRIANGLES, 0, 6);
+	deviceCommandContext->Draw(0, 6);
 }
 
 struct MinimapUnitVertex
 {
 	// This struct is copyable for convenience and because to move is to copy for primitives.
 	u8 r, g, b, a;
 	CVector2D position;
 };
 
 // Adds a vertex to the passed VertexArray
 inline void AddEntity(const MinimapUnitVertex& v,
 	VertexArrayIterator<u8[4]>& attrColor,
 	VertexArrayIterator<float[2]>& attrPos,
 	const float entityRadius)
 {
 	const CVector2D offsets[4] =
 	{
 		{-entityRadius, 0.0f},
 		{0.0f, -entityRadius},
 		{entityRadius, 0.0f},
 		{0.0f, entityRadius}
 	};
 
 	for (const CVector2D& offset : offsets)
 	{
 		(*attrColor)[0] = v.r;
 		(*attrColor)[1] = v.g;
 		(*attrColor)[2] = v.b;
 		(*attrColor)[3] = v.a;
 		++attrColor;
 
 		(*attrPos)[0] = v.position.X + offset.X;
 		(*attrPos)[1] = v.position.Y + offset.Y;
 		++attrPos;
 	}
 }
 
 } // anonymous namespace
 
 CMiniMapTexture::CMiniMapTexture(CSimulation2& simulation)
 	: m_Simulation(simulation), m_IndexArray(false),
 	m_VertexArray(Renderer::Backend::GL::CBuffer::Type::VERTEX, true)
 {
 	// Register Relax NG validator.
 	CXeromyces::AddValidator(g_VFS, "pathfinder", "simulation/data/pathfinder.rng");
 
 	m_ShallowPassageHeight = GetShallowPassageHeight();
 
 	double blinkDuration = 1.0;
 	// Tests won't have config initialised
 	if (CConfigDB::IsInitialised())
 	{
 		CFG_GET_VAL("gui.session.minimap.blinkduration", blinkDuration);
 		CFG_GET_VAL("gui.session.minimap.pingduration", m_PingDuration);
 	}
 	m_HalfBlinkDuration = blinkDuration / 2.0;
 
 	m_AttributePos.type = GL_FLOAT;
 	m_AttributePos.elems = 2;
 	m_VertexArray.AddAttribute(&m_AttributePos);
 
 	m_AttributeColor.type = GL_UNSIGNED_BYTE;
 	m_AttributeColor.elems = 4;
 	m_VertexArray.AddAttribute(&m_AttributeColor);
 
 	m_VertexArray.SetNumberOfVertices(MAX_ENTITIES_DRAWN * 4);
 	m_VertexArray.Layout();
 
 	m_IndexArray.SetNumberOfVertices(MAX_ENTITIES_DRAWN * 6);
 	m_IndexArray.Layout();
 	VertexArrayIterator<u16> index = m_IndexArray.GetIterator();
 	for (size_t i = 0; i < m_IndexArray.GetNumberOfVertices(); ++i)
 		*index++ = 0;
 	m_IndexArray.Upload();
 
 	VertexArrayIterator<float[2]> attrPos = m_AttributePos.GetIterator<float[2]>();
 	VertexArrayIterator<u8[4]> attrColor = m_AttributeColor.GetIterator<u8[4]>();
 	for (size_t i = 0; i < m_VertexArray.GetNumberOfVertices(); ++i)
 	{
 		(*attrColor)[0] = 0;
 		(*attrColor)[1] = 0;
 		(*attrColor)[2] = 0;
 		(*attrColor)[3] = 0;
 		++attrColor;
 
 		(*attrPos)[0] = -10000.0f;
 		(*attrPos)[1] = -10000.0f;
 
 		++attrPos;
 	}
 	m_VertexArray.Upload();
 }
 
 CMiniMapTexture::~CMiniMapTexture()
 {
 	DestroyTextures();
 }
 
 void CMiniMapTexture::Update(const float UNUSED(deltaRealTime))
 {
 	if (m_WaterHeight != g_Renderer.GetSceneRenderer().GetWaterManager().m_WaterHeight)
 	{
 		m_TerrainTextureDirty = true;
 		m_FinalTextureDirty = true;
 	}
 }
 
 void CMiniMapTexture::Render(Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	const CTerrain* terrain = g_Game->GetWorld()->GetTerrain();
 	if (!terrain)
 		return;
 
 	if (!m_TerrainTexture)
 		CreateTextures(deviceCommandContext, terrain);
 
 	if (m_TerrainTextureDirty)
 		RebuildTerrainTexture(deviceCommandContext, terrain);
 
 	RenderFinalTexture(deviceCommandContext);
 }
 
 void CMiniMapTexture::CreateTextures(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext, const CTerrain* terrain)
 {
 	DestroyTextures();
 
 	m_MapSize = terrain->GetVerticesPerSide();
 	const size_t textureSize = round_up_to_pow2(static_cast<size_t>(m_MapSize));
 
 	const Renderer::Backend::Sampler::Desc defaultSamplerDesc =
 		Renderer::Backend::Sampler::MakeDefaultSampler(
 			Renderer::Backend::Sampler::Filter::LINEAR,
 			Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE);
 
 	Renderer::Backend::GL::CDevice* backendDevice = deviceCommandContext->GetDevice();
 
 	// Create terrain texture
 	m_TerrainTexture = backendDevice->CreateTexture2D("MiniMapTerrainTexture",
 		Renderer::Backend::Format::R8G8B8A8, textureSize, textureSize, defaultSamplerDesc);
 
 	// Initialise texture with solid black, for the areas we don't
 	// overwrite with uploading later.
 	std::unique_ptr<u32[]> texData = std::make_unique<u32[]>(textureSize * textureSize);
 	for (size_t i = 0; i < textureSize * textureSize; ++i)
 		texData[i] = 0xFF000000;
 	deviceCommandContext->UploadTexture(
 		m_TerrainTexture.get(), Renderer::Backend::Format::R8G8B8A8,
 		texData.get(), textureSize * textureSize * 4);
 	texData.reset();
 
 	m_TerrainData = std::make_unique<u32[]>((m_MapSize - 1) * (m_MapSize - 1));
 
 	m_FinalTexture = backendDevice->CreateTexture2D("MiniMapFinalTexture",
 		Renderer::Backend::Format::R8G8B8A8, FINAL_TEXTURE_SIZE, FINAL_TEXTURE_SIZE, defaultSamplerDesc);
 
 	m_FinalTextureFramebuffer = backendDevice->CreateFramebuffer("MiniMapFinalFramebuffer",
 		m_FinalTexture.get(), nullptr);
 	ENSURE(m_FinalTextureFramebuffer);
 }
 
 void CMiniMapTexture::DestroyTextures()
 {
 	m_TerrainTexture.reset();
 	m_FinalTexture.reset();
 	m_TerrainData.reset();
 }
 
 void CMiniMapTexture::RebuildTerrainTexture(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CTerrain* terrain)
 {
 	const u32 x = 0;
 	const u32 y = 0;
 	const u32 width = m_MapSize - 1;
 	const u32 height = m_MapSize - 1;
 
 	m_WaterHeight = g_Renderer.GetSceneRenderer().GetWaterManager().m_WaterHeight;
 	m_TerrainTextureDirty = false;
 
 	for (u32 j = 0; j < height; ++j)
 	{
 		u32* dataPtr = m_TerrainData.get() + ((y + j) * width) + x;
 		for (u32 i = 0; i < width; ++i)
 		{
 			const float avgHeight = ( terrain->GetVertexGroundLevel((int)i, (int)j)
 					+ terrain->GetVertexGroundLevel((int)i+1, (int)j)
 					+ terrain->GetVertexGroundLevel((int)i, (int)j+1)
 					+ terrain->GetVertexGroundLevel((int)i+1, (int)j+1)
 				) / 4.0f;
 
 			if (avgHeight < m_WaterHeight && avgHeight > m_WaterHeight - m_ShallowPassageHeight)
 			{
 				// shallow water
 				*dataPtr++ = 0xffc09870;
 			}
 			else if (avgHeight < m_WaterHeight)
 			{
 				// Set water as constant color for consistency on different maps
 				*dataPtr++ = 0xffa07850;
 			}
 			else
 			{
 				int hmap = ((int)terrain->GetHeightMap()[(y + j) * m_MapSize + x + i]) >> 8;
 				int val = (hmap / 3) + 170;
 
 				u32 color = 0xFFFFFFFF;
 
 				CMiniPatch* mp = terrain->GetTile(x + i, y + j);
 				if (mp)
 				{
 					CTerrainTextureEntry* tex = mp->GetTextureEntry();
 					if (tex)
 					{
 						// If the texture can't be loaded yet, set the dirty flags
 						// so we'll try regenerating the terrain texture again soon
 						if (!tex->GetTexture()->TryLoad())
 							m_TerrainTextureDirty = true;
 
 						color = tex->GetBaseColor();
 					}
 				}
 
 				*dataPtr++ = ScaleColor(color, float(val) / 255.0f);
 			}
 		}
 	}
 
 	// Upload the texture
 	deviceCommandContext->UploadTextureRegion(
 		m_TerrainTexture.get(), Renderer::Backend::Format::R8G8B8A8,
 		m_TerrainData.get(), width * height * 4, 0, 0, width, height);
 }
 
 void CMiniMapTexture::RenderFinalTexture(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	// only update 2x / second
 	// (note: since units only move a few pixels per second on the minimap,
 	// we can get away with infrequent updates; this is slow)
 	// TODO: Update all but camera at same speed as simulation
 	const double currentTime = timer_Time();
 	const bool doUpdate = (currentTime - m_LastFinalTextureUpdate > 0.5) || m_FinalTextureDirty;
 	if (doUpdate)
 		m_LastFinalTextureUpdate = currentTime;
 	else
 		return;
 	m_FinalTextureDirty = false;
 
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render minimap texture");
 	deviceCommandContext->SetFramebuffer(m_FinalTextureFramebuffer.get());
 
 	const SViewPort oldViewPort = g_Renderer.GetViewport();
 	const SViewPort viewPort = { 0, 0, FINAL_TEXTURE_SIZE, FINAL_TEXTURE_SIZE };
 	g_Renderer.SetViewport(viewPort);
 
 	CmpPtr<ICmpRangeManager> cmpRangeManager(m_Simulation, SYSTEM_ENTITY);
 	ENSURE(cmpRangeManager);
 	CLOSTexture& losTexture = g_Game->GetView()->GetLOSTexture();
 
 	const float invTileMapSize = 1.0f / static_cast<float>(TERRAIN_TILE_SIZE * m_MapSize);
 	const float texCoordMax = m_TerrainTexture ? static_cast<float>(m_MapSize - 1) / m_TerrainTexture->GetWidth() : 1.0f;
 
 	CShaderProgramPtr shader;
 	CShaderTechniquePtr tech;
 
 	CShaderDefines baseDefines;
 	baseDefines.Add(str_MINIMAP_BASE, str_1);
 
 	tech = g_Renderer.GetShaderManager().LoadEffect(str_minimap, baseDefines);
 	Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 		tech->GetGraphicsPipelineStateDesc();
 	tech->BeginPass();
 	deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 	shader = tech->GetShader();
 
 	if (m_TerrainTexture)
 		shader->BindTexture(str_baseTex, m_TerrainTexture.get());
 
 	CMatrix3D baseTransform;
 	baseTransform.SetIdentity();
 	CMatrix3D baseTextureTransform;
 	baseTextureTransform.SetIdentity();
 
 	CMatrix3D terrainTransform;
 	terrainTransform.SetIdentity();
 	terrainTransform.Scale(texCoordMax, texCoordMax, 1.0f);
 	shader->Uniform(str_transform, baseTransform);
 	shader->Uniform(str_textureTransform, terrainTransform);
 
 	if (m_TerrainTexture)
-		DrawTexture(shader);
+		DrawTexture(deviceCommandContext, shader);
 
 	pipelineStateDesc.blendState.enabled = true;
 	pipelineStateDesc.blendState.srcColorBlendFactor = pipelineStateDesc.blendState.srcAlphaBlendFactor =
 		Renderer::Backend::BlendFactor::SRC_ALPHA;
 	pipelineStateDesc.blendState.dstColorBlendFactor = pipelineStateDesc.blendState.dstAlphaBlendFactor =
 		Renderer::Backend::BlendFactor::ONE_MINUS_SRC_ALPHA;
 	pipelineStateDesc.blendState.colorBlendOp = pipelineStateDesc.blendState.alphaBlendOp =
 		Renderer::Backend::BlendOp::ADD;
 	pipelineStateDesc.blendState.colorWriteMask =
 		Renderer::Backend::ColorWriteMask::RED |
 		Renderer::Backend::ColorWriteMask::GREEN |
 		Renderer::Backend::ColorWriteMask::BLUE;
 	deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 
 	// Draw territory boundaries
 	CTerritoryTexture& territoryTexture = g_Game->GetView()->GetTerritoryTexture();
 
 	shader->BindTexture(str_baseTex, territoryTexture.GetTexture());
 	shader->Uniform(str_transform, baseTransform);
 	shader->Uniform(str_textureTransform, territoryTexture.GetMinimapTextureMatrix());
 
-	DrawTexture(shader);
+	DrawTexture(deviceCommandContext, shader);
 
 	pipelineStateDesc.blendState.enabled = false;
 	pipelineStateDesc.blendState.colorWriteMask =
 		Renderer::Backend::ColorWriteMask::ALPHA;
 	deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 
 	shader->BindTexture(str_baseTex, losTexture.GetTexture());
 	shader->Uniform(str_transform, baseTransform);
 	shader->Uniform(str_textureTransform, losTexture.GetMinimapTextureMatrix());
 
-	DrawTexture(shader);
+	DrawTexture(deviceCommandContext, shader);
 
 	tech->EndPass();
 
 	CShaderDefines pointDefines;
 	pointDefines.Add(str_MINIMAP_POINT, str_1);
 	tech = g_Renderer.GetShaderManager().LoadEffect(str_minimap, pointDefines);
 	tech->BeginPass();
 	deviceCommandContext->SetGraphicsPipelineState(
 		tech->GetGraphicsPipelineStateDesc());
 	shader = tech->GetShader();
 	shader->Uniform(str_transform, baseTransform);
 
 	CMatrix3D unitMatrix;
 	unitMatrix.SetIdentity();
 	// Convert world space coordinates into [0, 2].
 	const float unitScale = invTileMapSize;
 	unitMatrix.Scale(unitScale * 2.0f, unitScale * 2.0f, 1.0f);
 	// Offset the coordinates to [-1, 1].
 	unitMatrix.Translate(CVector3D(-1.0f, -1.0f, 0.0f));
 	shader->Uniform(str_transform, unitMatrix);
 
 	CSimulation2::InterfaceList ents = m_Simulation.GetEntitiesWithInterface(IID_Minimap);
 
 	if (doUpdate)
 	{
 		VertexArrayIterator<float[2]> attrPos = m_AttributePos.GetIterator<float[2]>();
 		VertexArrayIterator<u8[4]> attrColor = m_AttributeColor.GetIterator<u8[4]>();
 
 		m_EntitiesDrawn = 0;
 		MinimapUnitVertex v;
 		std::vector<MinimapUnitVertex> pingingVertices;
 		pingingVertices.reserve(MAX_ENTITIES_DRAWN / 2);
 
 		// We might scale entities properly in the vertex shader but it requires
 		// additional space in the vertex buffer. So we assume that we don't need
 		// to change an entity size so often.
 		const float entityRadius = static_cast<float>(m_MapSize) / 128.0f * 6.0f;
 
 		if (currentTime > m_NextBlinkTime)
 		{
 			m_BlinkState = !m_BlinkState;
 			m_NextBlinkTime = currentTime + m_HalfBlinkDuration;
 		}
 
 		entity_pos_t posX, posZ;
 		for (CSimulation2::InterfaceList::const_iterator it = ents.begin(); it != ents.end(); ++it)
 		{
 			ICmpMinimap* cmpMinimap = static_cast<ICmpMinimap*>(it->second);
 			if (cmpMinimap->GetRenderData(v.r, v.g, v.b, posX, posZ))
 			{
 				LosVisibility vis = cmpRangeManager->GetLosVisibility(it->first, m_Simulation.GetSimContext().GetCurrentDisplayedPlayer());
 				if (vis != LosVisibility::HIDDEN)
 				{
 					v.a = 255;
 					v.position.X = posX.ToFloat();
 					v.position.Y = posZ.ToFloat();
 
 					// Check minimap pinging to indicate something
 					if (m_BlinkState && cmpMinimap->CheckPing(currentTime, m_PingDuration))
 					{
 						v.r = 255; // ping color is white
 						v.g = 255;
 						v.b = 255;
 						pingingVertices.push_back(v);
 					}
 					else
 					{
 						AddEntity(v, attrColor, attrPos, entityRadius);
 						++m_EntitiesDrawn;
 					}
 				}
 			}
 		}
 
 		// Add the pinged vertices at the end, so they are drawn on top
 		for (const MinimapUnitVertex& vertex : pingingVertices)
 		{
 			AddEntity(vertex, attrColor, attrPos, entityRadius);
 			++m_EntitiesDrawn;
 		}
 
 		ENSURE(m_EntitiesDrawn < MAX_ENTITIES_DRAWN);
 
 		VertexArrayIterator<u16> index = m_IndexArray.GetIterator();
 		for (size_t entityIndex = 0; entityIndex < m_EntitiesDrawn; ++entityIndex)
 		{
 			index[entityIndex * 6 + 0] = static_cast<u16>(entityIndex * 4 + 0);
 			index[entityIndex * 6 + 1] = static_cast<u16>(entityIndex * 4 + 1);
 			index[entityIndex * 6 + 2] = static_cast<u16>(entityIndex * 4 + 2);
 			index[entityIndex * 6 + 3] = static_cast<u16>(entityIndex * 4 + 0);
 			index[entityIndex * 6 + 4] = static_cast<u16>(entityIndex * 4 + 2);
 			index[entityIndex * 6 + 5] = static_cast<u16>(entityIndex * 4 + 3);
 		}
 
 		m_VertexArray.Upload();
 		m_IndexArray.Upload();
 	}
 
 	m_VertexArray.PrepareForRendering();
 
 	if (m_EntitiesDrawn > 0)
 	{
 		Renderer::Backend::GL::CDeviceCommandContext::Rect scissorRect;
 		scissorRect.x = scissorRect.y = 1;
 		scissorRect.width = scissorRect.height = FINAL_TEXTURE_SIZE - 2;
 		deviceCommandContext->SetScissors(1, &scissorRect);
 
-		u8* indexBase = m_IndexArray.Bind(deviceCommandContext);
+		m_IndexArray.UploadIfNeeded(deviceCommandContext);
 		u8* base = m_VertexArray.Bind(deviceCommandContext);
 		const GLsizei stride = (GLsizei)m_VertexArray.GetStride();
 
 		shader->VertexPointer(2, GL_FLOAT, stride, base + m_AttributePos.offset);
 		shader->ColorPointer(4, GL_UNSIGNED_BYTE, stride, base + m_AttributeColor.offset);
 		shader->AssertPointersBound();
 
-		glDrawElements(GL_TRIANGLES, m_EntitiesDrawn * 6, GL_UNSIGNED_SHORT, indexBase);
+		deviceCommandContext->SetIndexBuffer(m_IndexArray.GetBuffer());
+		deviceCommandContext->DrawIndexed(m_IndexArray.GetOffset(), m_EntitiesDrawn * 6, 0);
 
 		g_Renderer.GetStats().m_DrawCalls++;
 		CVertexBuffer::Unbind(deviceCommandContext);
 
 		deviceCommandContext->SetScissors(0, nullptr);
 	}
 
 	tech->EndPass();
 	deviceCommandContext->SetFramebuffer(
 		deviceCommandContext->GetDevice()->GetCurrentBackbuffer());
 	g_Renderer.SetViewport(oldViewPort);
 }
 
 // static
 float CMiniMapTexture::GetShallowPassageHeight()
 {
 	float shallowPassageHeight = 0.0f;
 	CParamNode externalParamNode;
 	CParamNode::LoadXML(externalParamNode, L"simulation/data/pathfinder.xml", "pathfinder");
 	const CParamNode pathingSettings = externalParamNode.GetChild("Pathfinder").GetChild("PassabilityClasses");
 	if (pathingSettings.GetChild("default").IsOk() && pathingSettings.GetChild("default").GetChild("MaxWaterDepth").IsOk())
 		shallowPassageHeight = pathingSettings.GetChild("default").GetChild("MaxWaterDepth").ToFloat();
 	return shallowPassageHeight;
 }
Index: ps/trunk/source/graphics/ParticleEmitter.cpp
===================================================================
--- ps/trunk/source/graphics/ParticleEmitter.cpp	(revision 26524)
+++ ps/trunk/source/graphics/ParticleEmitter.cpp	(revision 26525)
@@ -1,305 +1,305 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "ParticleEmitter.h"
 
 #include "graphics/LightEnv.h"
 #include "graphics/LOSTexture.h"
 #include "graphics/ParticleEmitterType.h"
 #include "graphics/ParticleManager.h"
 #include "graphics/ShaderProgram.h"
 #include "graphics/TextureManager.h"
 #include "ps/CStrInternStatic.h"
 #include "renderer/Renderer.h"
 #include "renderer/SceneRenderer.h"
 
 CParticleEmitter::CParticleEmitter(const CParticleEmitterTypePtr& type) :
 	m_Type(type), m_Active(true), m_NextParticleIdx(0), m_EmissionRoundingError(0.f),
 	m_LastUpdateTime(type->m_Manager.GetCurrentTime()),
 	m_IndexArray(false),
 	m_VertexArray(Renderer::Backend::GL::CBuffer::Type::VERTEX, true),
 	m_LastFrameNumber(-1)
 {
 	// If we should start with particles fully emitted, pretend that we
 	// were created in the past so the first update will produce lots of
 	// particles.
 	// TODO: instead of this, maybe it would make more sense to do a full
 	// lifetime-length update of all emitters when the game first starts
 	// (so that e.g. buildings constructed later on won't have fully-started
 	// emitters, but those at the start will)?
 	if (m_Type->m_StartFull)
 		m_LastUpdateTime -= m_Type->m_MaxLifetime;
 
 	m_Particles.reserve(m_Type->m_MaxParticles);
 
 	m_AttributePos.type = GL_FLOAT;
 	m_AttributePos.elems = 3;
 	m_VertexArray.AddAttribute(&m_AttributePos);
 
 	m_AttributeAxis.type = GL_FLOAT;
 	m_AttributeAxis.elems = 2;
 	m_VertexArray.AddAttribute(&m_AttributeAxis);
 
 	m_AttributeUV.type = GL_FLOAT;
 	m_AttributeUV.elems = 2;
 	m_VertexArray.AddAttribute(&m_AttributeUV);
 
 	m_AttributeColor.type = GL_UNSIGNED_BYTE;
 	m_AttributeColor.elems = 4;
 	m_VertexArray.AddAttribute(&m_AttributeColor);
 
 	m_VertexArray.SetNumberOfVertices(m_Type->m_MaxParticles * 4);
 	m_VertexArray.Layout();
 
 	m_IndexArray.SetNumberOfVertices(m_Type->m_MaxParticles * 6);
 	m_IndexArray.Layout();
 	VertexArrayIterator<u16> index = m_IndexArray.GetIterator();
 	for (u16 i = 0; i < m_Type->m_MaxParticles; ++i)
 	{
 		*index++ = i*4 + 0;
 		*index++ = i*4 + 1;
 		*index++ = i*4 + 2;
 		*index++ = i*4 + 2;
 		*index++ = i*4 + 3;
 		*index++ = i*4 + 0;
 	}
 	m_IndexArray.Upload();
 	m_IndexArray.FreeBackingStore();
 }
 
 void CParticleEmitter::UpdateArrayData(int frameNumber)
 {
 	if (m_LastFrameNumber == frameNumber)
 		return;
 
 	m_LastFrameNumber = frameNumber;
 
 	// Update m_Particles
 	m_Type->UpdateEmitter(*this, m_Type->m_Manager.GetCurrentTime() - m_LastUpdateTime);
 	m_LastUpdateTime = m_Type->m_Manager.GetCurrentTime();
 
 	// Regenerate the vertex array data:
 
 	VertexArrayIterator<CVector3D> attrPos = m_AttributePos.GetIterator<CVector3D>();
 	VertexArrayIterator<float[2]> attrAxis = m_AttributeAxis.GetIterator<float[2]>();
 	VertexArrayIterator<float[2]> attrUV = m_AttributeUV.GetIterator<float[2]>();
 	VertexArrayIterator<SColor4ub> attrColor = m_AttributeColor.GetIterator<SColor4ub>();
 
 	ENSURE(m_Particles.size() <= m_Type->m_MaxParticles);
 
 	CBoundingBoxAligned bounds;
 
 	for (size_t i = 0; i < m_Particles.size(); ++i)
 	{
 		// TODO: for more efficient rendering, maybe we should replace this with
 		// a degenerate quad if alpha is 0
 
 		bounds += m_Particles[i].pos;
 
 		*attrPos++ = m_Particles[i].pos;
 		*attrPos++ = m_Particles[i].pos;
 		*attrPos++ = m_Particles[i].pos;
 		*attrPos++ = m_Particles[i].pos;
 
 		// Compute corner offsets, split into sin/cos components so the vertex
 		// shader can multiply by the camera-right (or left?) and camera-up vectors
 		// to get rotating billboards:
 
 		float s = sin(m_Particles[i].angle) * m_Particles[i].size/2.f;
 		float c = cos(m_Particles[i].angle) * m_Particles[i].size/2.f;
 
 		(*attrAxis)[0] = c;
 		(*attrAxis)[1] = s;
 		++attrAxis;
 		(*attrAxis)[0] = s;
 		(*attrAxis)[1] = -c;
 		++attrAxis;
 		(*attrAxis)[0] = -c;
 		(*attrAxis)[1] = -s;
 		++attrAxis;
 		(*attrAxis)[0] = -s;
 		(*attrAxis)[1] = c;
 		++attrAxis;
 
 		(*attrUV)[0] = 1;
 		(*attrUV)[1] = 0;
 		++attrUV;
 		(*attrUV)[0] = 0;
 		(*attrUV)[1] = 0;
 		++attrUV;
 		(*attrUV)[0] = 0;
 		(*attrUV)[1] = 1;
 		++attrUV;
 		(*attrUV)[0] = 1;
 		(*attrUV)[1] = 1;
 		++attrUV;
 
 		SColor4ub color = m_Particles[i].color;
 
 		// Special case: If the blending depends on the source color, not the source alpha,
 		// then pre-multiply by the alpha. (This is kind of a hack.)
 		if (m_Type->m_BlendMode == CParticleEmitterType::BlendMode::OVERLAY ||
 			m_Type->m_BlendMode == CParticleEmitterType::BlendMode::MULTIPLY)
 		{
 			color.R = (color.R * color.A) / 255;
 			color.G = (color.G * color.A) / 255;
 			color.B = (color.B * color.A) / 255;
 		}
 
 		*attrColor++ = color;
 		*attrColor++ = color;
 		*attrColor++ = color;
 		*attrColor++ = color;
 	}
 
 	m_ParticleBounds = bounds;
 
 	m_VertexArray.Upload();
 }
 
 void CParticleEmitter::PrepareForRendering()
 {
 	m_VertexArray.PrepareForRendering();
 }
 
 void CParticleEmitter::Bind(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CShaderProgramPtr& shader)
 {
 	m_Type->m_Texture->UploadBackendTextureIfNeeded(deviceCommandContext);
 
 	CLOSTexture& los = g_Renderer.GetSceneRenderer().GetScene().GetLOSTexture();
 	shader->BindTexture(str_losTex, los.GetTextureSmooth());
 	shader->Uniform(str_losTransform, los.GetTextureMatrix()[0], los.GetTextureMatrix()[12], 0.f, 0.f);
 
 	const CLightEnv& lightEnv = g_Renderer.GetSceneRenderer().GetLightEnv();
 	shader->Uniform(str_sunColor, lightEnv.m_SunColor);
 	shader->Uniform(str_fogColor, lightEnv.m_FogColor);
 	shader->Uniform(str_fogParams, lightEnv.m_FogFactor, lightEnv.m_FogMax, 0.f, 0.f);
 
 	shader->BindTexture(str_baseTex, m_Type->m_Texture->GetBackendTexture());
 }
 
 void CParticleEmitter::RenderArray(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CShaderProgramPtr& shader)
 {
-	// Some drivers apparently don't like count=0 in glDrawArrays here,
-	// so skip all drawing in that case
 	if (m_Particles.empty())
 		return;
 
-	u8* indexBase = m_IndexArray.Bind(deviceCommandContext);
+	m_IndexArray.UploadIfNeeded(deviceCommandContext);
 	u8* base = m_VertexArray.Bind(deviceCommandContext);
 
 	GLsizei stride = (GLsizei)m_VertexArray.GetStride();
 
 	shader->VertexPointer(3, GL_FLOAT, stride, base + m_AttributePos.offset);
 
 	// Pass the sin/cos axis components as texcoords for no particular reason
 	// other than that they fit. (Maybe this should be glVertexAttrib* instead?)
 	shader->TexCoordPointer(GL_TEXTURE0, 2, GL_FLOAT, stride, base + m_AttributeUV.offset);
 	shader->TexCoordPointer(GL_TEXTURE1, 2, GL_FLOAT, stride, base + m_AttributeAxis.offset);
 
 	shader->ColorPointer(4, GL_UNSIGNED_BYTE, stride, base + m_AttributeColor.offset);
 
 	shader->AssertPointersBound();
-	glDrawElements(GL_TRIANGLES, (GLsizei)(m_Particles.size() * 6), GL_UNSIGNED_SHORT, indexBase);
+
+	deviceCommandContext->SetIndexBuffer(m_IndexArray.GetBuffer());
+	deviceCommandContext->DrawIndexed(m_IndexArray.GetOffset(), m_Particles.size() * 6, 0);
 
 	g_Renderer.GetStats().m_DrawCalls++;
 	g_Renderer.GetStats().m_Particles += m_Particles.size();
 }
 
 void CParticleEmitter::Unattach(const CParticleEmitterPtr& self)
 {
 	m_Active = false;
 	m_Type->m_Manager.AddUnattachedEmitter(self);
 }
 
 void CParticleEmitter::AddParticle(const SParticle& particle)
 {
 	if (m_NextParticleIdx >= m_Particles.size())
 		m_Particles.push_back(particle);
 	else
 		m_Particles[m_NextParticleIdx] = particle;
 
 	m_NextParticleIdx = (m_NextParticleIdx + 1) % m_Type->m_MaxParticles;
 }
 
 void CParticleEmitter::SetEntityVariable(const std::string& name, float value)
 {
 	m_EntityVariables[name] = value;
 }
 
 
 
 CModelParticleEmitter::CModelParticleEmitter(const CParticleEmitterTypePtr& type) :
 	m_Type(type)
 {
 	m_Emitter = CParticleEmitterPtr(new CParticleEmitter(m_Type));
 }
 
 CModelParticleEmitter::~CModelParticleEmitter()
 {
 	m_Emitter->Unattach(m_Emitter);
 }
 
 void CModelParticleEmitter::SetEntityVariable(const std::string& name, float value)
 {
 	m_Emitter->SetEntityVariable(name, value);
 }
 
 CModelAbstract* CModelParticleEmitter::Clone() const
 {
 	return new CModelParticleEmitter(m_Type);
 }
 
 void CModelParticleEmitter::CalcBounds()
 {
 	// TODO: we ought to compute sensible bounds here, probably based on the
 	// current computed particle positions plus the emitter type's largest
 	// potential bounding box at the current position
 
 	m_WorldBounds = m_Type->CalculateBounds(m_Emitter->GetPosition(), m_Emitter->GetParticleBounds());
 }
 
 void CModelParticleEmitter::ValidatePosition()
 {
 	// TODO: do we need to do anything here?
 
 	// This is a convenient (though possibly not particularly appropriate) place
 	// to invalidate bounds so they'll be recomputed from the recent particle data
 	InvalidateBounds();
 }
 
 void CModelParticleEmitter::InvalidatePosition()
 {
 }
 
 void CModelParticleEmitter::SetTransform(const CMatrix3D& transform)
 {
 	if (m_Transform == transform)
 		return;
 
 	m_Emitter->SetPosition(transform.GetTranslation());
 	m_Emitter->SetRotation(transform.GetRotation());
 
 	// call base class to set transform on this object
 	CRenderableObject::SetTransform(transform);
 }
Index: ps/trunk/source/graphics/ShaderProgram.h
===================================================================
--- ps/trunk/source/graphics/ShaderProgram.h	(revision 26524)
+++ ps/trunk/source/graphics/ShaderProgram.h	(revision 26525)
@@ -1,206 +1,206 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef INCLUDED_SHADERPROGRAM
 #define INCLUDED_SHADERPROGRAM
 
 #include "graphics/ShaderProgramPtr.h"
 #include "lib/ogl.h"
 #include "lib/file/vfs/vfs_path.h"
 #include "renderer/backend/gl/Texture.h"
 
 #include <map>
 #include <vector>
 
 struct CColor;
 class CMatrix3D;
 class CVector3D;
 class CShaderDefines;
 class CStrIntern;
 
 // Vertex data stream flags
 enum
 {
 	STREAM_POS = (1 << 0),
 	STREAM_NORMAL = (1 << 1),
 	STREAM_COLOR = (1 << 2),
 	STREAM_UV0 = (1 << 3),
 	STREAM_UV1 = (1 << 4),
 	STREAM_UV2 = (1 << 5),
 	STREAM_UV3 = (1 << 6),
 	STREAM_POSTOUV0 = (1 << 7),
 	STREAM_POSTOUV1 = (1 << 8),
 	STREAM_POSTOUV2 = (1 << 9),
 	STREAM_POSTOUV3 = (1 << 10)
 };
 
 /**
  * A compiled vertex+fragment shader program.
  * The implementation may use GL_ARB_{vertex,fragment}_program (ARB assembly syntax)
  * or GL_ARB_{vertex,fragment}_shader (GLSL), or may use hard-coded fixed-function
  * multitexturing setup code; the difference is hidden from the caller.
  *
  * Texture/uniform IDs are typically strings, corresponding to the names defined in
  * the shader .xml file. Alternatively (and more efficiently, if used very frequently),
  * call GetTextureBinding/GetUniformBinding and pass its return value as the ID.
  * Setting uniforms that the shader .xml doesn't support is harmless.
  *
  * For a high-level overview of shaders and materials, see
  * http://trac.wildfiregames.com/wiki/MaterialSystem
  */
 class CShaderProgram
 {
 	NONCOPYABLE(CShaderProgram);
 
 public:
 	typedef CStrIntern attrib_id_t;
 	typedef CStrIntern texture_id_t;
 	typedef CStrIntern uniform_id_t;
 	typedef std::pair<int, GLenum> frag_index_pair_t;
 
 	/**
 	 * Construct based on ARB vertex/fragment program files.
 	 */
 	static CShaderProgram* ConstructARB(const VfsPath& vertexFile, const VfsPath& fragmentFile,
 		const CShaderDefines& defines,
 		const std::map<CStrIntern, int>& vertexIndexes, const std::map<CStrIntern, frag_index_pair_t>& fragmentIndexes,
 		int streamflags);
 
 	/**
 	 * Construct based on GLSL vertex/fragment shader files.
 	 */
 	static CShaderProgram* ConstructGLSL(const VfsPath& vertexFile, const VfsPath& fragmentFile,
 		const CShaderDefines& defines,
 		const std::map<CStrIntern, int>& vertexAttribs,
 		int streamflags);
 
 	/**
 	 * Represents a uniform attribute or texture binding.
 	 * For uniforms:
 	 *  - ARB shaders store vertex location in 'first', fragment location in 'second'.
 	 *  - GLSL shaders store uniform location in 'first', data type in 'second'.
 	 * For textures, all store texture target (e.g. GL_TEXTURE_2D) in 'first', texture unit in 'second'.
 	 * Non-existent bindings must store -1 in both.
 	 */
 	struct Binding
 	{
 		Binding(int a, int b) : first(a), second(b) { }
 
 		Binding() : first(-1), second(-1) { }
 
 		/**
 		 * Returns whether this uniform attribute is active in the shader.
 		 * If not then there's no point calling Uniform() to set its value.
 		 */
 		bool Active() const { return first != -1 || second != -1; }
 
 		int first;
 		int second;
 	};
 
 	virtual ~CShaderProgram() { }
 
 	virtual void Reload() = 0;
 
 	/**
 	 * Returns whether this shader was successfully loaded.
 	 */
 	bool IsValid() const;
 
 	/**
 	 * Binds the shader into the GL context. Call this before calling Uniform()
 	 * or trying to render with it.
 	 */
 	virtual void Bind() = 0;
 
 	/**
 	 * Unbinds the shader from the GL context. Call this after rendering with it.
 	 */
 	virtual void Unbind() = 0;
 
 	/**
 	 * Returns bitset of STREAM_* value, indicating what vertex data streams the
 	 * vertex shader needs (e.g. position, color, UV, ...).
 	 */
 	int GetStreamFlags() const;
 
 
 	virtual Binding GetTextureBinding(texture_id_t id) = 0;
 
 	// Variants of texture binding:
 	void BindTexture(texture_id_t id, const Renderer::Backend::GL::CTexture* tex);
 	void BindTexture(Binding id, const Renderer::Backend::GL::CTexture* tex);
 
 	virtual Binding GetUniformBinding(uniform_id_t id) = 0;
 
 	// Uniform-setting methods that subclasses must define:
 	virtual void Uniform(Binding id, float v0, float v1, float v2, float v3) = 0;
 	virtual void Uniform(Binding id, const CMatrix3D& v) = 0;
 	virtual void Uniform(Binding id, size_t count, const CMatrix3D* v) = 0;
 	virtual void Uniform(Binding id, size_t count, const float* v) = 0;
 
 	// Convenient uniform-setting wrappers:
 
 	void Uniform(Binding id, int v);
 	void Uniform(Binding id, float v);
 	void Uniform(Binding id, float v0, float v1);
 	void Uniform(Binding id, const CVector3D& v);
 	void Uniform(Binding id, const CColor& v);
 
 	void Uniform(uniform_id_t id, int v);
 	void Uniform(uniform_id_t id, float v);
 	void Uniform(uniform_id_t id, float v0, float v1);
 	void Uniform(uniform_id_t id, const CVector3D& v);
 	void Uniform(uniform_id_t id, const CColor& v);
 	void Uniform(uniform_id_t id, float v0, float v1, float v2, float v3);
 	void Uniform(uniform_id_t id, const CMatrix3D& v);
 	void Uniform(uniform_id_t id, size_t count, const CMatrix3D* v);
 	void Uniform(uniform_id_t id, size_t count, const float* v);
 
 	// Vertex attribute pointers (equivalent to glVertexPointer etc):
 
 	virtual void VertexPointer(GLint size, GLenum type, GLsizei stride, const void* pointer);
 	virtual void NormalPointer(GLenum type, GLsizei stride, const void* pointer);
 	virtual void ColorPointer(GLint size, GLenum type, GLsizei stride, const void* pointer);
 	virtual void TexCoordPointer(GLenum texture, GLint size, GLenum type, GLsizei stride, const void* pointer);
 	virtual void VertexAttribPointer(attrib_id_t id, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const void* pointer);
 	virtual void VertexAttribIPointer(attrib_id_t id, GLint size, GLenum type, GLsizei stride, const void* pointer);
 
 	/**
 	 * Checks that all the required vertex attributes have been set.
-	 * Call this before calling glDrawArrays/glDrawElements etc to avoid potential crashes.
+	 * Call this before calling Draw/DrawIndexed etc to avoid potential crashes.
 	 */
 	void AssertPointersBound();
 
 	virtual std::vector<VfsPath> GetFileDependencies() const = 0;
 
 protected:
 	CShaderProgram(int streamflags);
 
 	virtual void BindTexture(texture_id_t id, GLuint tex) = 0;
 	virtual void BindTexture(Binding id, GLuint tex) = 0;
 
 	bool m_IsValid;
 	int m_StreamFlags;
 
 	// Non-GLSL client state handling:
 	void BindClientStates();
 	void UnbindClientStates();
 	int m_ValidStreams; // which streams have been specified via VertexPointer etc since the last Bind
 };
 
 #endif // INCLUDED_SHADERPROGRAM
Index: ps/trunk/source/graphics/TextRenderer.cpp
===================================================================
--- ps/trunk/source/graphics/TextRenderer.cpp	(revision 26524)
+++ ps/trunk/source/graphics/TextRenderer.cpp	(revision 26525)
@@ -1,332 +1,333 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "TextRenderer.h"
 
 #include "graphics/Font.h"
 #include "graphics/FontManager.h"
 #include "graphics/ShaderProgram.h"
 #include "graphics/TextureManager.h"
 #include "lib/ogl.h"
 #include "maths/Matrix3D.h"
 #include "ps/CStrIntern.h"
 #include "ps/CStrInternStatic.h"
 #include "renderer/Renderer.h"
 
 #include <errno.h>
 
 namespace
 {
 
 // We can't draw chars more than vertices, currently we use 4 vertices per char.
 constexpr size_t MAX_CHAR_COUNT_PER_BATCH = 65536 / 4;
 
 } // anonymous namespace
 
 CTextRenderer::CTextRenderer()
 {
 	ResetTranslate();
 	SetCurrentColor(CColor(1.0f, 1.0f, 1.0f, 1.0f));
 	SetCurrentFont(str_sans_10);
 }
 
 void CTextRenderer::ResetTranslate(const CVector2D& translate)
 {
 	m_Translate = translate;
 	m_Dirty = true;
 }
 
 void CTextRenderer::Translate(float x, float y)
 {
 	m_Translate += CVector2D{x, y};
 	m_Dirty = true;
 }
 
 void CTextRenderer::SetClippingRect(const CRect& rect)
 {
 	m_Clipping = rect;
 }
 
 void CTextRenderer::SetCurrentColor(const CColor& color)
 {
 	if (m_Color != color)
 	{
 		m_Color = color;
 		m_Dirty = true;
 	}
 }
 
 void CTextRenderer::SetCurrentFont(CStrIntern font)
 {
 	if (font != m_FontName)
 	{
 		m_FontName = font;
 		m_Font = g_Renderer.GetFontManager().LoadFont(font);
 		m_Dirty = true;
 	}
 }
 
 void CTextRenderer::PrintfAdvance(const wchar_t* fmt, ...)
 {
 	wchar_t buf[1024] = {0};
 
 	va_list args;
 	va_start(args, fmt);
 	int ret = vswprintf(buf, ARRAY_SIZE(buf)-1, fmt, args);
 	va_end(args);
 
 	if (ret < 0)
 		debug_printf("CTextRenderer::Printf vswprintf failed (buffer size exceeded?) - return value %d, errno %d\n", ret, errno);
 
 	PutAdvance(buf);
 }
 
 
 void CTextRenderer::PrintfAt(float x, float y, const wchar_t* fmt, ...)
 {
 	wchar_t buf[1024] = {0};
 
 	va_list args;
 	va_start(args, fmt);
 	int ret = vswprintf(buf, ARRAY_SIZE(buf)-1, fmt, args);
 	va_end(args);
 
 	if (ret < 0)
 		debug_printf("CTextRenderer::PrintfAt vswprintf failed (buffer size exceeded?) - return value %d, errno %d\n", ret, errno);
 
 	Put(x, y, buf);
 }
 
 void CTextRenderer::PutAdvance(const wchar_t* buf)
 {
 	Put(0.0f, 0.0f, buf);
 
 	int w, h;
 	m_Font->CalculateStringSize(buf, w, h);
 	Translate((float)w, 0.0f);
 }
 
 void CTextRenderer::Put(float x, float y, const wchar_t* buf)
 {
 	if (buf[0] == 0)
 		return; // empty string; don't bother storing
 
 	PutString(x, y, new std::wstring(buf), true);
 }
 
 void CTextRenderer::Put(float x, float y, const char* buf)
 {
 	if (buf[0] == 0)
 		return; // empty string; don't bother storing
 
 	PutString(x, y, new std::wstring(wstring_from_utf8(buf)), true);
 }
 
 void CTextRenderer::Put(float x, float y, const std::wstring* buf)
 {
 	if (buf->empty())
 		return; // empty string; don't bother storing
 
 	PutString(x, y, buf, false);
 }
 
 void CTextRenderer::PutString(float x, float y, const std::wstring* buf, bool owned)
 {
 	if (!m_Font)
 		return; // invalid font; can't render
 
 	if (m_Clipping != CRect())
 	{
 		float x0, y0, x1, y1;
 		m_Font->GetGlyphBounds(x0, y0, x1, y1);
 		if (y + y1 < m_Clipping.top)
 			return;
 		if (y + y0 > m_Clipping.bottom)
 			return;
 	}
 
 	// If any state has changed since the last batch, start a new batch
 	if (m_Dirty)
 	{
 		SBatch batch;
 		batch.chars = 0;
 		batch.translate = m_Translate;
 		batch.color = m_Color;
 		batch.font = m_Font;
 		m_Batches.push_back(batch);
 		m_Dirty = false;
 	}
 
 	// Push a new run onto the latest batch
 	SBatchRun run;
 	run.x = x;
 	run.y = y;
 	m_Batches.back().runs.push_back(run);
 	m_Batches.back().runs.back().text = buf;
 	m_Batches.back().runs.back().owned = owned;
 	m_Batches.back().chars += buf->size();
 }
 
 
 struct t2f_v2i
 {
 	t2f_v2i() : u(0), v(0), x(0), y(0) { }
 	float u, v;
 	i16 x, y;
 };
 
 struct SBatchCompare
 {
 	bool operator()(const CTextRenderer::SBatch& a, const CTextRenderer::SBatch& b)
 	{
 		if (a.font != b.font)
 			return a.font < b.font;
 		// TODO: is it worth sorting by color/translate too?
 		return false;
 	}
 };
 
 void CTextRenderer::Render(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CShaderProgramPtr& shader, const CMatrix3D& transform)
 {
 	std::vector<u16> indexes;
 	std::vector<t2f_v2i> vertexes;
 
 	// Try to merge non-consecutive batches that share the same font/color/translate:
 	// sort the batch list by font, then merge the runs of adjacent compatible batches
 	m_Batches.sort(SBatchCompare());
 	for (std::list<SBatch>::iterator it = m_Batches.begin(); it != m_Batches.end(); )
 	{
 		std::list<SBatch>::iterator next = std::next(it);
 		if (next != m_Batches.end() && it->chars + next->chars <= MAX_CHAR_COUNT_PER_BATCH && it->font == next->font && it->color == next->color && it->translate == next->translate)
 		{
 			it->chars += next->chars;
 			it->runs.splice(it->runs.end(), next->runs);
 			m_Batches.erase(next);
 		}
 		else
 			++it;
 	}
 
 	bool transformChanged = false;
 
 	CTexture* lastTexture = nullptr;
 	for (std::list<SBatch>::iterator it = m_Batches.begin(); it != m_Batches.end(); ++it)
 	{
 		SBatch& batch = *it;
 
 		const CFont::GlyphMap& glyphs = batch.font->GetGlyphs();
 
 		if (lastTexture != batch.font->GetTexture().get())
 		{
 			lastTexture = batch.font->GetTexture().get();
 			lastTexture->UploadBackendTextureIfNeeded(deviceCommandContext);
 			shader->BindTexture(str_tex, lastTexture->GetBackendTexture());
 		}
 
 		if (batch.translate.X != 0.0f || batch.translate.Y != 0.0f)
 		{
 			CMatrix3D translation;
 			translation.SetTranslation(batch.translate.X, batch.translate.Y, 0.0f);
 			shader->Uniform(str_transform, transform * translation);
 			transformChanged = true;
 		}
 
 		// ALPHA-only textures will have .rgb sampled as 0, so we need to
 		// replace it with white (but not affect RGBA textures)
 		if (batch.font->HasRGB())
 			shader->Uniform(str_colorAdd, CColor(0.0f, 0.0f, 0.0f, 0.0f));
 		else
 			shader->Uniform(str_colorAdd, CColor(batch.color.r, batch.color.g, batch.color.b, 0.0f));
 
 		shader->Uniform(str_colorMul, batch.color);
 
 		vertexes.resize(std::min(MAX_CHAR_COUNT_PER_BATCH, batch.chars) * 4);
 		indexes.resize(std::min(MAX_CHAR_COUNT_PER_BATCH, batch.chars) * 6);
 
 		size_t idx = 0;
 
-		auto flush = [&idx, &vertexes, &indexes, &shader]() -> void {
+		auto flush = [deviceCommandContext, &idx, &vertexes, &indexes, &shader]() -> void {
 			if (idx == 0)
 				return;
 			shader->VertexPointer(2, GL_SHORT, sizeof(t2f_v2i), &vertexes[0].x);
 			shader->TexCoordPointer(GL_TEXTURE0, 2, GL_FLOAT, sizeof(t2f_v2i), &vertexes[0].u);
 
-			glDrawElements(GL_TRIANGLES, idx * 6, GL_UNSIGNED_SHORT, &indexes[0]);
+			deviceCommandContext->SetIndexBufferData(indexes.data());
+			deviceCommandContext->DrawIndexed(0, idx * 6, 0);
 			idx = 0;
 		};
 
 		for (std::list<SBatchRun>::iterator runit = batch.runs.begin(); runit != batch.runs.end(); ++runit)
 		{
 			SBatchRun& run = *runit;
 			i16 x = run.x;
 			i16 y = run.y;
 			for (size_t i = 0; i < run.text->size(); ++i)
 			{
 				const CFont::GlyphData* g = glyphs.get((*run.text)[i]);
 
 				if (!g)
 					g = glyphs.get(0xFFFD); // Use the missing glyph symbol
 				if (!g) // Missing the missing glyph symbol - give up
 					continue;
 
 				vertexes[idx*4].u = g->u1;
 				vertexes[idx*4].v = g->v0;
 				vertexes[idx*4].x = g->x1 + x;
 				vertexes[idx*4].y = g->y0 + y;
 
 				vertexes[idx*4+1].u = g->u0;
 				vertexes[idx*4+1].v = g->v0;
 				vertexes[idx*4+1].x = g->x0 + x;
 				vertexes[idx*4+1].y = g->y0 + y;
 
 				vertexes[idx*4+2].u = g->u0;
 				vertexes[idx*4+2].v = g->v1;
 				vertexes[idx*4+2].x = g->x0 + x;
 				vertexes[idx*4+2].y = g->y1 + y;
 
 				vertexes[idx*4+3].u = g->u1;
 				vertexes[idx*4+3].v = g->v1;
 				vertexes[idx*4+3].x = g->x1 + x;
 				vertexes[idx*4+3].y = g->y1 + y;
 
 				indexes[idx*6+0] = static_cast<u16>(idx*4+0);
 				indexes[idx*6+1] = static_cast<u16>(idx*4+1);
 				indexes[idx*6+2] = static_cast<u16>(idx*4+2);
 				indexes[idx*6+3] = static_cast<u16>(idx*4+2);
 				indexes[idx*6+4] = static_cast<u16>(idx*4+3);
 				indexes[idx*6+5] = static_cast<u16>(idx*4+0);
 
 				x += g->xadvance;
 
 				++idx;
 				if (idx == MAX_CHAR_COUNT_PER_BATCH)
 					flush();
 			}
 		}
 
 		flush();
 	}
 
 	m_Batches.clear();
 
 	if (transformChanged)
 		shader->Uniform(str_transform, transform);
 }
Index: ps/trunk/source/gui/ObjectTypes/CMiniMap.cpp
===================================================================
--- ps/trunk/source/gui/ObjectTypes/CMiniMap.cpp	(revision 26524)
+++ ps/trunk/source/gui/ObjectTypes/CMiniMap.cpp	(revision 26525)
@@ -1,467 +1,471 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "CMiniMap.h"
 
 #include "graphics/Canvas2D.h"
 #include "graphics/GameView.h"
 #include "graphics/LOSTexture.h"
 #include "graphics/MiniMapTexture.h"
 #include "graphics/MiniPatch.h"
 #include "graphics/ShaderManager.h"
 #include "graphics/ShaderProgramPtr.h"
 #include "graphics/Terrain.h"
 #include "graphics/TerrainTextureEntry.h"
 #include "graphics/TerrainTextureManager.h"
 #include "graphics/TextureManager.h"
 #include "gui/CGUI.h"
 #include "gui/GUIManager.h"
 #include "gui/GUIMatrix.h"
 #include "lib/bits.h"
 #include "lib/external_libraries/libsdl.h"
 #include "lib/ogl.h"
 #include "lib/timer.h"
 #include "maths/MathUtil.h"
 #include "ps/CLogger.h"
 #include "ps/ConfigDB.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Filesystem.h"
 #include "ps/Game.h"
 #include "ps/GameSetup/Config.h"
 #include "ps/Profile.h"
 #include "ps/World.h"
 #include "renderer/Renderer.h"
 #include "renderer/RenderingOptions.h"
 #include "renderer/SceneRenderer.h"
 #include "renderer/WaterManager.h"
 #include "scriptinterface/Object.h"
 #include "simulation2/Simulation2.h"
 #include "simulation2/components/ICmpMinimap.h"
 #include "simulation2/components/ICmpRangeManager.h"
 #include "simulation2/helpers/Los.h"
 #include "simulation2/system/ParamNode.h"
 
 #include <array>
 #include <cmath>
 #include <vector>
 
 namespace
 {
 
 // Adds segments pieces lying inside the circle to lines.
 void CropPointsByCircle(const std::array<CVector3D, 4>& points, const CVector3D& center, const float radius, std::vector<CVector3D>* lines)
 {
 	constexpr float EPS = 1e-3f;
 	lines->reserve(points.size() * 2);
 	for (size_t idx = 0; idx < points.size(); ++idx)
 	{
 		const CVector3D& currentPoint = points[idx];
 		const CVector3D& nextPoint = points[(idx + 1) % points.size()];
 		const CVector3D direction = (nextPoint - currentPoint).Normalized();
 		const CVector3D normal(direction.Z, 0.0f, -direction.X);
 		const float offset = normal.Dot(currentPoint) - normal.Dot(center);
 		// We need to have lines only inside the circle.
 		if (std::abs(offset) + EPS >= radius)
 			continue;
 		const CVector3D closestPoint = center + normal * offset;
 		const float halfChordLength = sqrt(radius * radius - offset * offset);
 		const CVector3D intersectionA = closestPoint - direction * halfChordLength;
 		const CVector3D intersectionB = closestPoint + direction * halfChordLength;
 		// We have no intersection if the segment is lying outside of the circle.
 		if (direction.Dot(currentPoint) + EPS > direction.Dot(intersectionB) ||
 		    direction.Dot(nextPoint) - EPS < direction.Dot(intersectionA))
 			continue;
 
 		lines->emplace_back(
 			direction.Dot(currentPoint) > direction.Dot(intersectionA) ? currentPoint : intersectionA);
 		lines->emplace_back(
 			direction.Dot(nextPoint) < direction.Dot(intersectionB) ? nextPoint : intersectionB);
 	}
 }
 
-void DrawTexture(CShaderProgramPtr shader, float angle, float x, float y, float x2, float y2, float mapScale)
+void DrawTexture(
+	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
+	const CShaderProgramPtr& shader, float angle, float x, float y, float x2, float y2, float mapScale)
 {
 	// Rotate the texture coordinates (0,0)-(coordMax,coordMax) around their center point (m,m)
 	// Scale square maps to fit in circular minimap area
 	const float s = sin(angle) * mapScale;
 	const float c = cos(angle) * mapScale;
 	const float m = 0.5f;
 
 	float quadTex[] = {
 		m*(-c + s + 1.f), m*(-c + -s + 1.f),
 		m*(c + s + 1.f), m*(-c + s + 1.f),
 		m*(c + -s + 1.f), m*(c + s + 1.f),
 
 		m*(c + -s + 1.f), m*(c + s + 1.f),
 		m*(-c + -s + 1.f), m*(c + -s + 1.f),
 		m*(-c + s + 1.f), m*(-c + -s + 1.f)
 	};
 	float quadVerts[] = {
 		x, y, 0.0f,
 		x2, y, 0.0f,
 		x2, y2, 0.0f,
 
 		x2, y2, 0.0f,
 		x, y2, 0.0f,
 		x, y, 0.0f
 	};
 
 	shader->TexCoordPointer(GL_TEXTURE0, 2, GL_FLOAT, 0, quadTex);
 	shader->VertexPointer(3, GL_FLOAT, 0, quadVerts);
 	shader->AssertPointersBound();
 
-	glDrawArrays(GL_TRIANGLES, 0, 6);
+	deviceCommandContext->Draw(0, 6);
 }
 
 } // anonymous namespace
 
 const CStr CMiniMap::EventNameWorldClick = "WorldClick";
 
 CMiniMap::CMiniMap(CGUI& pGUI) :
 	IGUIObject(pGUI),
 	m_MapSize(0), m_MapScale(1.f), m_Mask(this, "mask", false),
 	m_FlareTextureCount(this, "flare_texture_count", 0), m_FlareRenderSize(this, "flare_render_size", 0),
 	m_FlareInterleave(this, "flare_interleave", false), m_FlareAnimationSpeed(this, "flare_animation_speed", 0.0f),
 	m_FlareLifetimeSeconds(this, "flare_lifetime_seconds", 0.0f),
 	m_FlareStartFadeSeconds(this, "flare_start_fade_seconds", 0.0f),
 	m_FlareStopFadeSeconds(this, "flare_stop_fade_seconds", 0.0f)
 {
 	m_Clicking = false;
 	m_MouseHovering = false;
 }
 
 CMiniMap::~CMiniMap() = default;
 
 void CMiniMap::HandleMessage(SGUIMessage& Message)
 {
 	IGUIObject::HandleMessage(Message);
 	switch (Message.type)
 	{
 	case GUIM_LOAD:
 		RecreateFlareTextures();
 		break;
 	case GUIM_SETTINGS_UPDATED:
 		if (Message.value == "flare_texture_count")
 			RecreateFlareTextures();
 		break;
 	case GUIM_MOUSE_PRESS_LEFT:
 		if (m_MouseHovering)
 		{
 			if (!CMiniMap::FireWorldClickEvent(SDL_BUTTON_LEFT, 1))
 			{
 				SetCameraPositionFromMousePosition();
 				m_Clicking = true;
 			}
 		}
 		break;
 	case GUIM_MOUSE_RELEASE_LEFT:
 		if (m_MouseHovering && m_Clicking)
 			SetCameraPositionFromMousePosition();
 		m_Clicking = false;
 		break;
 	case GUIM_MOUSE_DBLCLICK_LEFT:
 		if (m_MouseHovering && m_Clicking)
 			SetCameraPositionFromMousePosition();
 		m_Clicking = false;
 		break;
 	case GUIM_MOUSE_ENTER:
 		m_MouseHovering = true;
 		break;
 	case GUIM_MOUSE_LEAVE:
 		m_Clicking = false;
 		m_MouseHovering = false;
 		break;
 	case GUIM_MOUSE_RELEASE_RIGHT:
 		CMiniMap::FireWorldClickEvent(SDL_BUTTON_RIGHT, 1);
 		break;
 	case GUIM_MOUSE_DBLCLICK_RIGHT:
 		CMiniMap::FireWorldClickEvent(SDL_BUTTON_RIGHT, 2);
 		break;
 	case GUIM_MOUSE_MOTION:
 		if (m_MouseHovering && m_Clicking)
 			SetCameraPositionFromMousePosition();
 		break;
 	case GUIM_MOUSE_WHEEL_DOWN:
 	case GUIM_MOUSE_WHEEL_UP:
 		Message.Skip();
 		break;
 
 	default:
 		break;
 	}
 }
 
 void CMiniMap::RecreateFlareTextures()
 {
 	// Catch invalid values.
 	if (m_FlareTextureCount > 99)
 	{
 		LOGERROR("Invalid value for flare texture count. Valid range is 0-99.");
 		return;
 	}
 	const CStr textureNumberingFormat = "art/textures/animated/minimap-flare/frame%02u.png";
 	m_FlareTextures.clear();
 	m_FlareTextures.reserve(m_FlareTextureCount);
 	for (u32 i = 0; i < m_FlareTextureCount; ++i)
 	{
 		CTextureProperties textureProps(fmt::sprintf(textureNumberingFormat, i).c_str());
 		textureProps.SetIgnoreQuality(true);
 		m_FlareTextures.emplace_back(g_Renderer.GetTextureManager().CreateTexture(textureProps));
 	}
 }
 
 bool CMiniMap::IsMouseOver() const
 {
 	const CVector2D& mousePos = m_pGUI.GetMousePos();
 	// Take the magnitude of the difference of the mouse position and minimap center.
 	const float distanceFromCenter = (mousePos - m_CachedActualSize.CenterPoint()).Length();
 	// If the distance is less then the radius of the minimap (half the width) the mouse is over the minimap.
 	return distanceFromCenter < m_CachedActualSize.GetWidth() / 2.0;
 }
 
 void CMiniMap::GetMouseWorldCoordinates(float& x, float& z) const
 {
 	// Determine X and Z according to proportion of mouse position and minimap.
 	const CVector2D& mousePos = m_pGUI.GetMousePos();
 
 	float px = (mousePos.X - m_CachedActualSize.left) / m_CachedActualSize.GetWidth();
 	float py = (m_CachedActualSize.bottom - mousePos.Y) / m_CachedActualSize.GetHeight();
 
 	float angle = GetAngle();
 
 	// Scale world coordinates for shrunken square map
 	x = TERRAIN_TILE_SIZE * m_MapSize * (m_MapScale * (cos(angle)*(px-0.5) - sin(angle)*(py-0.5)) + 0.5);
 	z = TERRAIN_TILE_SIZE * m_MapSize * (m_MapScale * (cos(angle)*(py-0.5) + sin(angle)*(px-0.5)) + 0.5);
 }
 
 void CMiniMap::SetCameraPositionFromMousePosition()
 {
 	CTerrain* terrain = g_Game->GetWorld()->GetTerrain();
 
 	CVector3D target;
 	GetMouseWorldCoordinates(target.X, target.Z);
 	target.Y = terrain->GetExactGroundLevel(target.X, target.Z);
 	g_Game->GetView()->MoveCameraTarget(target);
 }
 
 float CMiniMap::GetAngle() const
 {
 	CVector3D cameraIn = g_Game->GetView()->GetCamera()->GetOrientation().GetIn();
 	return -atan2(cameraIn.X, cameraIn.Z);
 }
 
 CVector2D CMiniMap::WorldSpaceToMiniMapSpace(const CVector3D& worldPosition) const
 {
 	// Coordinates with 0,0 in the middle of the minimap and +-0.5 as max.
 	const float invTileMapSize = 1.0f / static_cast<float>(TERRAIN_TILE_SIZE * m_MapSize);
 	const float relativeX = (worldPosition.X * invTileMapSize - 0.5) / m_MapScale;
 	const float relativeY = (worldPosition.Z * invTileMapSize - 0.5) / m_MapScale;
 
 	// Rotate coordinates.
 	const float angle = GetAngle();
 	const float rotatedX = cos(angle) * relativeX + sin(angle) * relativeY;
 	const float rotatedY = -sin(angle) * relativeX + cos(angle) * relativeY;
 
 	// Calculate coordinates in GUI space.
 	return CVector2D(
 		m_CachedActualSize.left + (0.5f + rotatedX) * m_CachedActualSize.GetWidth(),
 		m_CachedActualSize.bottom - (0.5f + rotatedY) * m_CachedActualSize.GetHeight());
 }
 
 bool CMiniMap::FireWorldClickEvent(int button, int UNUSED(clicks))
 {
 	ScriptRequest rq(g_GUI->GetActiveGUI()->GetScriptInterface());
 
 	float x, z;
 	GetMouseWorldCoordinates(x, z);
 
 	JS::RootedValue coords(rq.cx);
 	Script::CreateObject(rq, &coords, "x", x, "z", z);
 
 	JS::RootedValue buttonJs(rq.cx);
 	Script::ToJSVal(rq, &buttonJs, button);
 
 	JS::RootedValueVector paramData(rq.cx);
 	ignore_result(paramData.append(coords));
 	ignore_result(paramData.append(buttonJs));
 
 	return ScriptEventWithReturn(EventNameWorldClick, paramData);
 }
 
 // This sets up and draws the rectangle on the minimap
 //  which represents the view of the camera in the world.
 void CMiniMap::DrawViewRect(CCanvas2D& canvas) const
 {
 	// Compute the camera frustum intersected with a fixed-height plane.
 	// Use the water height as a fixed base height, which should be the lowest we can go
 	const float sampleHeight = g_Renderer.GetSceneRenderer().GetWaterManager().m_WaterHeight;
 
 	const CCamera* camera = g_Game->GetView()->GetCamera();
 	const std::array<CVector3D, 4> hitPoints = {
 		camera->GetWorldCoordinates(0, g_Renderer.GetHeight(), sampleHeight),
 		camera->GetWorldCoordinates(g_Renderer.GetWidth(), g_Renderer.GetHeight(), sampleHeight),
 		camera->GetWorldCoordinates(g_Renderer.GetWidth(), 0, sampleHeight),
 		camera->GetWorldCoordinates(0, 0, sampleHeight)
 	};
 
 	std::vector<CVector3D> worldSpaceLines;
 	// We need to prevent drawing view bounds out of the map.
 	const float halfMapSize = static_cast<float>((m_MapSize - 1) * TERRAIN_TILE_SIZE) * 0.5f;
 	CropPointsByCircle(hitPoints, CVector3D(halfMapSize, 0.0f, halfMapSize), halfMapSize * m_MapScale, &worldSpaceLines);
 	if (worldSpaceLines.empty())
 		return;
 
 	for (size_t index = 0; index < worldSpaceLines.size() && index + 1 < worldSpaceLines.size(); index += 2)
 	{
 		const CVector2D from = WorldSpaceToMiniMapSpace(worldSpaceLines[index]);
 		const CVector2D to = WorldSpaceToMiniMapSpace(worldSpaceLines[index + 1]);
 		canvas.DrawLine({from, to}, 2.0f, CColor(1.0f, 0.3f, 0.3f, 1.0f));
 	}
 }
 
 void CMiniMap::DrawFlare(CCanvas2D& canvas, const MapFlare& flare, double currentTime) const
 {
 	if (m_FlareTextures.empty())
 		return;
 
 	const CVector2D flareCenter = WorldSpaceToMiniMapSpace(CVector3D(flare.pos.X, 0.0f, flare.pos.Y));
 
 	const CRect destination(
 		flareCenter.X - m_FlareRenderSize, flareCenter.Y - m_FlareRenderSize,
 		flareCenter.X + m_FlareRenderSize, flareCenter.Y + m_FlareRenderSize);
 
 	const double deltaTime = currentTime - flare.time;
 	const double remainingTime = m_FlareLifetimeSeconds - deltaTime;
 	const u32 flooredStep = floor(deltaTime * m_FlareAnimationSpeed);
 
 	const float startFadeAlpha = m_FlareStartFadeSeconds > 0.0f ? deltaTime / m_FlareStartFadeSeconds : 1.0f;
 	const float stopFadeAlpha = m_FlareStopFadeSeconds > 0.0f ? remainingTime / m_FlareStopFadeSeconds : 1.0f;
 	const float alpha = Clamp(std::min(
 		SmoothStep(0.0f, 1.0f, startFadeAlpha), SmoothStep(0.0f, 1.0f, stopFadeAlpha)),
 		0.0f, 1.0f);
 
 	DrawFlareFrame(canvas, flooredStep % m_FlareTextures.size(), destination, flare.color, alpha);
 
 	// Draw a second circle if the first has reached half of the animation.
 	if (m_FlareInterleave && flooredStep >= m_FlareTextures.size() / 2)
 	{
 		DrawFlareFrame(canvas, (flooredStep - m_FlareTextures.size() / 2) % m_FlareTextures.size(),
 			destination, flare.color, alpha);
 	}
 }
 
 void CMiniMap::DrawFlareFrame(CCanvas2D& canvas, const u32 frameIndex,
 	const CRect& destination, const CColor& color, float alpha) const
 {
 	// TODO: Only draw inside the minimap circle.
 	CTexturePtr texture = m_FlareTextures[frameIndex % m_FlareTextures.size()];
 	CColor finalColor = color;
 	finalColor.a *= alpha;
 	canvas.DrawTexture(texture, destination,
 		CRect(0, 0, texture->GetWidth(), texture->GetHeight()), finalColor,
 		CColor(0.0f, 0.0f, 0.0f, 0.0f), 0.0f);
 }
 
 void CMiniMap::Draw(CCanvas2D& canvas)
 {
 	PROFILE3("render minimap");
 
 	// The terrain isn't actually initialized until the map is loaded, which
 	// happens when the game is started, so abort until then.
 	if (!g_Game || !g_Game->IsGameStarted())
 		return;
 
 	if (!m_Mask)
 		canvas.DrawRect(m_CachedActualSize, CColor(0.0f, 0.0f, 0.0f, 1.0f));
 
 	canvas.Flush();
 
 	CSimulation2* sim = g_Game->GetSimulation2();
 	CmpPtr<ICmpRangeManager> cmpRangeManager(*sim, SYSTEM_ENTITY);
 	ENSURE(cmpRangeManager);
 
 	// Set our globals in case they hadn't been set before
 	const CTerrain* terrain = g_Game->GetWorld()->GetTerrain();
 	m_MapSize = terrain->GetVerticesPerSide();
 	m_MapScale = (cmpRangeManager->GetLosCircular() ? 1.f : 1.414f);
 
 	// Draw the main textured quad
 	CMiniMapTexture& miniMapTexture = g_Game->GetView()->GetMiniMapTexture();
 	if (miniMapTexture.GetTexture())
 	{
 		CShaderProgramPtr shader;
 		CShaderTechniquePtr tech;
 
 		CShaderDefines baseDefines;
 		baseDefines.Add(str_MINIMAP_BASE, str_1);
 
 		tech = g_Renderer.GetShaderManager().LoadEffect(str_minimap, baseDefines);
 		Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 			tech->GetGraphicsPipelineStateDesc();
 		pipelineStateDesc.blendState.enabled = true;
 		pipelineStateDesc.blendState.srcColorBlendFactor = pipelineStateDesc.blendState.srcAlphaBlendFactor =
 			Renderer::Backend::BlendFactor::SRC_ALPHA;
 		pipelineStateDesc.blendState.dstColorBlendFactor = pipelineStateDesc.blendState.dstAlphaBlendFactor =
 			Renderer::Backend::BlendFactor::ONE_MINUS_SRC_ALPHA;
 		pipelineStateDesc.blendState.colorBlendOp = pipelineStateDesc.blendState.alphaBlendOp =
 			Renderer::Backend::BlendOp::ADD;
 		tech->BeginPass();
-		g_Renderer.GetDeviceCommandContext()->SetGraphicsPipelineState(pipelineStateDesc);
+		Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext =
+			g_Renderer.GetDeviceCommandContext();
+		deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 		shader = tech->GetShader();
 
 		shader->BindTexture(str_baseTex, miniMapTexture.GetTexture());
 		const CMatrix3D baseTransform = GetDefaultGuiMatrix();
 		CMatrix3D baseTextureTransform;
 		baseTextureTransform.SetIdentity();
 		shader->Uniform(str_transform, baseTransform);
 		shader->Uniform(str_textureTransform, baseTextureTransform);
 
 		const float x = m_CachedActualSize.left, y = m_CachedActualSize.bottom;
 		const float x2 = m_CachedActualSize.right, y2 = m_CachedActualSize.top;
 		const float angle = GetAngle();
-		DrawTexture(shader, angle, x, y, x2, y2, m_MapScale);
+		DrawTexture(deviceCommandContext, shader, angle, x, y, x2, y2, m_MapScale);
 
 		tech->EndPass();
 	}
 
 	PROFILE_START("minimap flares");
 
 	DrawViewRect(canvas);
 
 	const double currentTime = timer_Time();
 	while (!m_MapFlares.empty() && m_FlareLifetimeSeconds + m_MapFlares.front().time < currentTime)
 		m_MapFlares.pop_front();
 
 	for (const MapFlare& flare : m_MapFlares)
 		DrawFlare(canvas, flare, currentTime);
 
 	PROFILE_END("minimap flares");
 }
 
 bool CMiniMap::Flare(const CVector2D& pos, const CStr& colorStr)
 {
 	CColor color;
 	if (!color.ParseString(colorStr))
 	{
 		LOGERROR("CMiniMap::Flare: Couldn't parse color string");
 		return false;
 	}
 	m_MapFlares.push_back({ pos, color, timer_Time() });
 	return true;
 }
Index: ps/trunk/source/renderer/DebugRenderer.cpp
===================================================================
--- ps/trunk/source/renderer/DebugRenderer.cpp	(revision 26524)
+++ ps/trunk/source/renderer/DebugRenderer.cpp	(revision 26525)
@@ -1,368 +1,381 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "renderer/DebugRenderer.h"
 
 #include "graphics/Camera.h"
 #include "graphics/Color.h"
 #include "graphics/ShaderManager.h"
 #include "graphics/ShaderProgram.h"
 #include "lib/ogl.h"
 #include "maths/BoundingBoxAligned.h"
 #include "maths/Brush.h"
 #include "maths/Matrix3D.h"
 #include "maths/Vector3D.h"
 #include "ps/CStrInternStatic.h"
 #include "renderer/backend/gl/DeviceCommandContext.h"
 #include "renderer/Renderer.h"
 #include "renderer/SceneRenderer.h"
 
 #include <cmath>
 
 namespace
 {
 
 void SetGraphicsPipelineStateFromTechAndColor(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CShaderTechniquePtr& tech, const CColor& color, const bool depthTestEnabled = true,
 	const bool wireframe = false)
 {
 	Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc = tech->GetGraphicsPipelineStateDesc();
 	pipelineStateDesc.depthStencilState.depthTestEnabled = depthTestEnabled;
 	if (color.a != 1.0f)
 	{
 		pipelineStateDesc.blendState.enabled = true;
 		pipelineStateDesc.blendState.srcColorBlendFactor = pipelineStateDesc.blendState.srcAlphaBlendFactor =
 			Renderer::Backend::BlendFactor::SRC_ALPHA;
 		pipelineStateDesc.blendState.dstColorBlendFactor = pipelineStateDesc.blendState.dstAlphaBlendFactor =
 			Renderer::Backend::BlendFactor::ONE_MINUS_SRC_ALPHA;
 		pipelineStateDesc.blendState.colorBlendOp = pipelineStateDesc.blendState.alphaBlendOp =
 			Renderer::Backend::BlendOp::ADD;
 	}
 	else
 		pipelineStateDesc.blendState.enabled = false;
 	if (wireframe)
 		pipelineStateDesc.rasterizationState.polygonMode = Renderer::Backend::PolygonMode::LINE;
 	pipelineStateDesc.rasterizationState.cullMode = Renderer::Backend::CullMode::NONE;
 	deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 }
 
 } // anonymous namespace
 
 void CDebugRenderer::DrawLine(
 	const CVector3D& from, const CVector3D& to, const CColor& color,
 	const float width, const bool depthTestEnabled)
 {
 	if (from == to)
 		return;
 
 	DrawLine({from, to}, color, width, depthTestEnabled);
 }
 
 void CDebugRenderer::DrawLine(
 	const std::vector<CVector3D>& line, const CColor& color,
 	const float width, const bool depthTestEnabled)
 {
 #if CONFIG2_GLES
 	UNUSED2(line); UNUSED2(color); UNUSED2(width); UNUSED2(depthTestEnabled);
 	#warning TODO: implement drawing line for GLES
 #else
 	CShaderTechniquePtr debugLineTech =
 		g_Renderer.GetShaderManager().LoadEffect(str_debug_line);
 	debugLineTech->BeginPass();
-	SetGraphicsPipelineStateFromTechAndColor(g_Renderer.GetDeviceCommandContext(), debugLineTech, color, depthTestEnabled);
+	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext =
+		g_Renderer.GetDeviceCommandContext();
+	SetGraphicsPipelineStateFromTechAndColor(
+		deviceCommandContext, debugLineTech, color, depthTestEnabled);
 
 	const CCamera& viewCamera = g_Renderer.GetSceneRenderer().GetViewCamera();
 
 	CShaderProgramPtr debugLineShader = debugLineTech->GetShader();
 	debugLineShader->Uniform(str_transform, viewCamera.GetViewProjection());
 	debugLineShader->Uniform(str_color, color);
 
 	const CVector3D cameraIn = viewCamera.GetOrientation().GetIn();
 
 	std::vector<float> vertices;
 	vertices.reserve(line.size() * 6 * 3);
 #define ADD(position) \
 	vertices.emplace_back((position).X); \
 	vertices.emplace_back((position).Y); \
 	vertices.emplace_back((position).Z);
 
 	for (size_t idx = 1; idx < line.size(); ++idx)
 	{
 		const CVector3D from = line[idx - 1];
 		const CVector3D to = line[idx];
 		const CVector3D direction = (to - from).Normalized();
 		const CVector3D view = direction.Dot(cameraIn) > 0.9f ?
 			CVector3D(0.0f, 1.0f, 0.0f) :
 			cameraIn;
 		const CVector3D offset = view.Cross(direction).Normalized() * width;
 
 		ADD(from + offset)
 		ADD(to - offset)
 		ADD(to + offset)
 		ADD(from + offset)
 		ADD(from - offset)
 		ADD(to - offset)
 	}
 
 #undef ADD
 
 	debugLineShader->VertexPointer(3, GL_FLOAT, 0, vertices.data());
 	debugLineShader->AssertPointersBound();
-	glDrawArrays(GL_TRIANGLES, 0, vertices.size() / 3);
+	deviceCommandContext->Draw(0, vertices.size() / 3);
 
 	debugLineTech->EndPass();
 #endif
 }
 
 void CDebugRenderer::DrawCircle(const CVector3D& origin, const float radius, const CColor& color)
 {
 #if CONFIG2_GLES
 	UNUSED2(origin); UNUSED2(radius); UNUSED2(color);
 	#warning TODO: implement drawing circle for GLES
 #else
 	CShaderTechniquePtr debugCircleTech =
 		g_Renderer.GetShaderManager().LoadEffect(str_debug_line);
 	debugCircleTech->BeginPass();
-	SetGraphicsPipelineStateFromTechAndColor(g_Renderer.GetDeviceCommandContext(), debugCircleTech, color);
+	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext =
+		g_Renderer.GetDeviceCommandContext();
+	SetGraphicsPipelineStateFromTechAndColor(
+		deviceCommandContext, debugCircleTech, color);
 
 	const CCamera& camera = g_Renderer.GetSceneRenderer().GetViewCamera();
 
 	CShaderProgramPtr debugCircleShader = debugCircleTech->GetShader();
 	debugCircleShader->Uniform(str_transform, camera.GetViewProjection());
 	debugCircleShader->Uniform(str_color, color);
 
 	const CVector3D cameraUp = camera.GetOrientation().GetUp();
 	const CVector3D cameraLeft = camera.GetOrientation().GetLeft();
 
 	std::vector<float> vertices;
 #define ADD(position) \
 	vertices.emplace_back((position).X); \
 	vertices.emplace_back((position).Y); \
 	vertices.emplace_back((position).Z);
 
 	constexpr size_t segments = 16;
 	for (size_t idx = 0; idx <= segments; ++idx)
 	{
 		const float angle = M_PI * 2.0f * idx / segments;
 		const CVector3D offset = cameraUp * sin(angle) - cameraLeft * cos(angle);
 		const float nextAngle = M_PI * 2.0f * (idx + 1) / segments;
 		const CVector3D nextOffset = cameraUp * sin(nextAngle) - cameraLeft * cos(nextAngle);
 		ADD(origin)
 		ADD(origin + offset * radius)
 		ADD(origin + nextOffset * radius)
 	}
 
 #undef ADD
 
 	debugCircleShader->VertexPointer(3, GL_FLOAT, 0, vertices.data());
 	debugCircleShader->AssertPointersBound();
-	glDrawArrays(GL_TRIANGLE_FAN, 0, vertices.size() / 3);
+	deviceCommandContext->Draw(0, vertices.size() / 3);
 
 	debugCircleTech->EndPass();
 #endif
 }
 
 void CDebugRenderer::DrawCameraFrustum(const CCamera& camera, const CColor& color, int intermediates, bool wireframe)
 {
 #if CONFIG2_GLES
 	UNUSED2(camera); UNUSED2(color); UNUSED2(intermediates);
 	#warning TODO: implement camera frustum for GLES
 #else
 	CCamera::Quad nearPoints;
 	CCamera::Quad farPoints;
 
 	camera.GetViewQuad(camera.GetNearPlane(), nearPoints);
 	camera.GetViewQuad(camera.GetFarPlane(), farPoints);
 	for (int i = 0; i < 4; ++i)
 	{
 		nearPoints[i] = camera.m_Orientation.Transform(nearPoints[i]);
 		farPoints[i] = camera.m_Orientation.Transform(farPoints[i]);
 	}
 
 	CShaderTechniquePtr overlayTech =
 		g_Renderer.GetShaderManager().LoadEffect(str_debug_line);
 	overlayTech->BeginPass();
-	SetGraphicsPipelineStateFromTechAndColor(g_Renderer.GetDeviceCommandContext(), overlayTech, color, true, wireframe);
+	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext =
+		g_Renderer.GetDeviceCommandContext();
+	SetGraphicsPipelineStateFromTechAndColor(
+		deviceCommandContext, overlayTech, color, true, wireframe);
 
 	CShaderProgramPtr overlayShader = overlayTech->GetShader();
 	overlayShader->Uniform(str_transform, g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection());
 	overlayShader->Uniform(str_color, color);
 
 	std::vector<float> vertices;
 #define ADD(position) \
 	vertices.emplace_back((position).X); \
 	vertices.emplace_back((position).Y); \
 	vertices.emplace_back((position).Z);
 
 	// Near plane.
 	ADD(nearPoints[0]);
 	ADD(nearPoints[1]);
 	ADD(nearPoints[2]);
 	ADD(nearPoints[0]);
 	ADD(nearPoints[2]);
 	ADD(nearPoints[3]);
 
 	// Far plane.
 	ADD(farPoints[0]);
 	ADD(farPoints[1]);
 	ADD(farPoints[2]);
 	ADD(farPoints[0]);
 	ADD(farPoints[2]);
 	ADD(farPoints[3]);
 
 	// Intermediate planes.
 	CVector3D intermediatePoints[4];
 	for (int i = 0; i < intermediates; ++i)
 	{
 		const float t = (i + 1.0f) / (intermediates + 1.0f);
 
 		for (int j = 0; j < 4; ++j)
 			intermediatePoints[j] = nearPoints[j] * t + farPoints[j] * (1.0f - t);
 
 		ADD(intermediatePoints[0]);
 		ADD(intermediatePoints[1]);
 		ADD(intermediatePoints[2]);
 		ADD(intermediatePoints[0]);
 		ADD(intermediatePoints[2]);
 		ADD(intermediatePoints[3]);
 	}
 
 	overlayShader->VertexPointer(3, GL_FLOAT, 0, vertices.data());
 	overlayShader->AssertPointersBound();
-	glDrawArrays(GL_TRIANGLES, 0, vertices.size() / 3);
+	deviceCommandContext->Draw(0, vertices.size() / 3);
 
 	vertices.clear();
 
 	// Connection lines.
 	for (int i = 0; i < 4; ++i)
 	{
 		const int nextI = (i + 1) % 4;
 		ADD(nearPoints[i]);
 		ADD(farPoints[nextI]);
 		ADD(farPoints[i]);
 		ADD(nearPoints[i]);
 		ADD(nearPoints[nextI]);
 		ADD(farPoints[nextI]);
 	}
 
 	overlayShader->VertexPointer(3, GL_FLOAT, 0, vertices.data());
 	overlayShader->AssertPointersBound();
-	glDrawArrays(GL_TRIANGLES, 0, vertices.size() / 3);
+	deviceCommandContext->Draw(0, vertices.size() / 3);
 #undef ADD
 
 	overlayTech->EndPass();
 #endif
 }
 
 void CDebugRenderer::DrawBoundingBox(
 	const CBoundingBoxAligned& boundingBox, const CColor& color,
 	bool wireframe)
 {
 	DrawBoundingBox(
 		boundingBox, color,
 		g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection(), wireframe);
 }
 
 void CDebugRenderer::DrawBoundingBox(
 	const CBoundingBoxAligned& boundingBox, const CColor& color,
 	const CMatrix3D& transform, bool wireframe)
 {
 	CShaderTechniquePtr shaderTech = g_Renderer.GetShaderManager().LoadEffect(str_solid);
 	shaderTech->BeginPass();
+	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext =
+		g_Renderer.GetDeviceCommandContext();
 	SetGraphicsPipelineStateFromTechAndColor(
-		g_Renderer.GetDeviceCommandContext(), shaderTech, color, true, wireframe);
+		deviceCommandContext, shaderTech, color, true, wireframe);
 
 	const CShaderProgramPtr& shader = shaderTech->GetShader();
 	shader->Uniform(str_color, color);
 	shader->Uniform(str_transform, transform);
 
 	std::vector<float> data;
 
 #define ADD_FACE(x, y, z) \
 	ADD_PT(0, 0, x, y, z); ADD_PT(1, 0, x, y, z); ADD_PT(1, 1, x, y, z); \
 	ADD_PT(1, 1, x, y, z); ADD_PT(0, 1, x, y, z); ADD_PT(0, 0, x, y, z);
 #define ADD_PT(u_, v_, x, y, z) \
 	STMT(int u = u_; int v = v_; \
 		data.push_back(boundingBox[x].X); \
 		data.push_back(boundingBox[y].Y); \
 		data.push_back(boundingBox[z].Z); \
 	)
 
 	ADD_FACE(u, v, 0);
 	ADD_FACE(0, u, v);
 	ADD_FACE(u, 0, 1-v);
 	ADD_FACE(u, 1-v, 1);
 	ADD_FACE(1, u, 1-v);
 	ADD_FACE(u, 1, v);
 
 #undef ADD_FACE
 
 	shader->VertexPointer(3, GL_FLOAT, 3 * sizeof(float), data.data());
 
 	shader->AssertPointersBound();
-	glDrawArrays(GL_TRIANGLES, 0, 6*6);
+	deviceCommandContext->Draw(0, 6*6);
 
 	shaderTech->EndPass();
 }
 
 void CDebugRenderer::DrawBrush(const CBrush& brush, const CColor& color, bool wireframe)
 {
 	CShaderTechniquePtr shaderTech = g_Renderer.GetShaderManager().LoadEffect(str_solid);
 	shaderTech->BeginPass();
+	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext =
+		g_Renderer.GetDeviceCommandContext();
 	SetGraphicsPipelineStateFromTechAndColor(
-		g_Renderer.GetDeviceCommandContext(), shaderTech, color, true, wireframe);
+		deviceCommandContext, shaderTech, color, true, wireframe);
 
 	const CShaderProgramPtr& shader = shaderTech->GetShader();
 	shader->Uniform(str_color, color);
 	shader->Uniform(str_transform, g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection());
 
 	std::vector<float> data;
 
 	std::vector<std::vector<size_t>> faces;
 	brush.GetFaces(faces);
 
 #define ADD_VERT(a) \
 	STMT( \
 		data.push_back(brush.GetVertices()[faces[i][a]].X); \
 		data.push_back(brush.GetVertices()[faces[i][a]].Y); \
 		data.push_back(brush.GetVertices()[faces[i][a]].Z); \
 	)
 
 	for (size_t i = 0; i < faces.size(); ++i)
 	{
 		// Triangulate into (0,1,2), (0,2,3), ...
 		for (size_t j = 1; j < faces[i].size() - 2; ++j)
 		{
 			ADD_VERT(0);
 			ADD_VERT(j);
 			ADD_VERT(j+1);
 		}
 	}
 
 #undef ADD_VERT
 
 	shader->VertexPointer(3, GL_FLOAT, 3 * sizeof(float), data.data());
 
 	shader->AssertPointersBound();
-	glDrawArrays(GL_TRIANGLES, 0, data.size() / 5);
+	deviceCommandContext->Draw(0, data.size() / 5);
 
 	shaderTech->EndPass();
 }
 
Index: ps/trunk/source/renderer/DecalRData.cpp
===================================================================
--- ps/trunk/source/renderer/DecalRData.cpp	(revision 26524)
+++ ps/trunk/source/renderer/DecalRData.cpp	(revision 26525)
@@ -1,331 +1,331 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "DecalRData.h"
 
 #include "graphics/Decal.h"
 #include "graphics/Model.h"
 #include "graphics/ShaderManager.h"
 #include "graphics/Terrain.h"
 #include "graphics/TextureManager.h"
 #include "lib/allocators/DynamicArena.h"
 #include "lib/allocators/STLAllocators.h"
 #include "ps/CLogger.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Game.h"
 #include "ps/Profile.h"
 #include "renderer/Renderer.h"
 #include "renderer/TerrainRenderer.h"
 #include "simulation2/components/ICmpWaterManager.h"
 #include "simulation2/Simulation2.h"
 
 #include <algorithm>
 
 // TODO: Currently each decal is a separate CDecalRData. We might want to use
 // lots of decals for special effects like shadows, footprints, etc, in which
 // case we should probably redesign this to batch them all together for more
 // efficient rendering.
 
 namespace
 {
 
 struct SDecalBatch
 {
 	CDecalRData* decal;
 	CShaderTechniquePtr shaderTech;
 	CVertexBuffer::VBChunk* vertices;
 	CVertexBuffer::VBChunk* indices;
 };
 
 struct SDecalBatchComparator
 {
 	bool operator()(const SDecalBatch& lhs, const SDecalBatch& rhs) const
 	{
 		if (lhs.shaderTech != rhs.shaderTech)
 			return lhs.shaderTech < rhs.shaderTech;
 		if (lhs.vertices->m_Owner != rhs.vertices->m_Owner)
 			return lhs.vertices->m_Owner < rhs.vertices->m_Owner;
 		if (lhs.indices->m_Owner != rhs.indices->m_Owner)
 			return lhs.indices->m_Owner < rhs.indices->m_Owner;
 		return lhs.decal < rhs.decal;
 	}
 };
 
 } // anonymous namespace
 
 CDecalRData::CDecalRData(CModelDecal* decal, CSimulation2* simulation)
 	: m_Decal(decal), m_Simulation(simulation)
 {
 	BuildVertexData();
 }
 
 CDecalRData::~CDecalRData() = default;
 
 void CDecalRData::Update(CSimulation2* simulation)
 {
 	m_Simulation = simulation;
 	if (m_UpdateFlags != 0)
 	{
 		BuildVertexData();
 		m_UpdateFlags = 0;
 	}
 }
 
 void CDecalRData::RenderDecals(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const std::vector<CDecalRData*>& decals, const CShaderDefines& context, ShadowMap* shadow)
 {
 	PROFILE3("render terrain decals");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render terrain decals");
 
 	using Arena = Allocators::DynamicArena<256 * KiB>;
 
 	Arena arena;
 
 	using Batches = std::vector<SDecalBatch, ProxyAllocator<SDecalBatch, Arena>>;
 	Batches batches((Batches::allocator_type(arena)));
 	batches.reserve(decals.size());
 
 	CShaderDefines contextDecal = context;
 	contextDecal.Add(str_DECAL, str_1);
 
 	for (CDecalRData* decal : decals)
 	{
 		CMaterial &material = decal->m_Decal->m_Decal.m_Material;
 
 		if (material.GetShaderEffect().empty())
 		{
 			LOGERROR("Terrain renderer failed to load shader effect.\n");
 			continue;
 		}
 
 		CShaderDefines defines = contextDecal;
 		defines.SetMany(material.GetShaderDefines(0));
 		CShaderTechniquePtr techBase = g_Renderer.GetShaderManager().LoadEffect(
 			material.GetShaderEffect(), defines);
 		if (!techBase)
 		{
 			LOGERROR("Terrain renderer failed to load shader effect (%s)\n",
 					material.GetShaderEffect().string().c_str());
 			continue;
 		}
 
 		if (material.GetSamplers().empty() || !decal->m_VBDecals || !decal->m_VBDecalsIndices)
 			continue;
 
 		SDecalBatch batch;
 		batch.decal = decal;
 		batch.shaderTech = techBase;
 		batch.vertices = decal->m_VBDecals.Get();
 		batch.indices = decal->m_VBDecalsIndices.Get();
 
 		batches.emplace_back(std::move(batch));
 	}
 
 	if (batches.empty())
 		return;
 
 	std::sort(batches.begin(), batches.end(), SDecalBatchComparator());
 
 	CVertexBuffer* lastIB = nullptr;
 	for (auto itTechBegin = batches.begin(), itTechEnd = batches.begin(); itTechBegin != batches.end(); itTechBegin = itTechEnd)
 	{
 		while (itTechEnd != batches.end() && itTechBegin->shaderTech == itTechEnd->shaderTech)
 			++itTechEnd;
 
 		const CShaderTechniquePtr& techBase = itTechBegin->shaderTech;
 		const int numPasses = techBase->GetNumPasses();
 
 		for (int pass = 0; pass < numPasses; ++pass)
 		{
 			Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 				techBase->GetGraphicsPipelineStateDesc(pass);
 			pipelineStateDesc.blendState.enabled = true;
 			pipelineStateDesc.blendState.srcColorBlendFactor = pipelineStateDesc.blendState.srcAlphaBlendFactor =
 				Renderer::Backend::BlendFactor::SRC_ALPHA;
 			pipelineStateDesc.blendState.dstColorBlendFactor = pipelineStateDesc.blendState.dstAlphaBlendFactor =
 				Renderer::Backend::BlendFactor::ONE_MINUS_SRC_ALPHA;
 			pipelineStateDesc.blendState.colorBlendOp = pipelineStateDesc.blendState.alphaBlendOp =
 				Renderer::Backend::BlendOp::ADD;
 			techBase->BeginPass(pass);
 			deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 
 			const CShaderProgramPtr& shader = techBase->GetShader(pass);
 			TerrainRenderer::PrepareShader(shader, shadow);
 
 			CVertexBuffer* lastVB = nullptr;
 			for (auto itDecal = itTechBegin; itDecal != itTechEnd; ++itDecal)
 			{
 				SDecalBatch& batch = *itDecal;
 				CDecalRData* decal = batch.decal;
 				CMaterial& material = decal->m_Decal->m_Decal.m_Material;
 
 				const CMaterial::SamplersVector& samplers = material.GetSamplers();
 				for (const CMaterial::TextureSampler& sampler : samplers)
 					sampler.Sampler->UploadBackendTextureIfNeeded(deviceCommandContext);
 				for (const CMaterial::TextureSampler& sampler : samplers)
 					shader->BindTexture(sampler.Name, sampler.Sampler->GetBackendTexture());
 
 				material.GetStaticUniforms().BindUniforms(shader);
 
 				// TODO: Need to handle floating decals correctly. In particular, we need
 				// to render non-floating before water and floating after water (to get
 				// the blending right), and we also need to apply the correct lighting in
 				// each case, which doesn't really seem possible with the current
 				// TerrainRenderer.
 				// Also, need to mark the decals as dirty when water height changes.
 
 				//	m_Decal->GetBounds().Render();
 
 				shader->Uniform(str_shadingColor, decal->m_Decal->GetShadingColor());
 
 				if (lastVB != batch.vertices->m_Owner)
 				{
 					lastVB = batch.vertices->m_Owner;
 					const GLsizei stride = sizeof(SDecalVertex);
 					SDecalVertex* base = (SDecalVertex*)batch.vertices->m_Owner->Bind(deviceCommandContext);
 
 					shader->VertexPointer(3, GL_FLOAT, stride, &base->m_Position[0]);
 					shader->NormalPointer(GL_FLOAT, stride, &base->m_Normal[0]);
 					shader->TexCoordPointer(GL_TEXTURE0, 2, GL_FLOAT, stride, &base->m_UV[0]);
 				}
 
 				shader->AssertPointersBound();
 
 				if (lastIB != batch.indices->m_Owner)
 				{
 					lastIB = batch.indices->m_Owner;
-					batch.indices->m_Owner->Bind(deviceCommandContext);
+					batch.indices->m_Owner->UploadIfNeeded(deviceCommandContext);
+					deviceCommandContext->SetIndexBuffer(batch.indices->m_Owner->GetBuffer());
 				}
 
-				u8* indexBase = nullptr;
-				glDrawElements(GL_TRIANGLES, batch.indices->m_Count, GL_UNSIGNED_SHORT, indexBase + sizeof(u16) * (batch.indices->m_Index));
+				deviceCommandContext->DrawIndexed(batch.indices->m_Index, batch.indices->m_Count, 0);
 
 				// bump stats
 				g_Renderer.m_Stats.m_DrawCalls++;
 				g_Renderer.m_Stats.m_TerrainTris += batch.indices->m_Count / 3;
 			}
 
 			techBase->EndPass();
 		}
 	}
 
 	CVertexBuffer::Unbind(deviceCommandContext);
 }
 
 void CDecalRData::BuildVertexData()
 {
 	PROFILE("decal build");
 
 	const SDecal& decal = m_Decal->m_Decal;
 
 	// TODO: Currently this constructs an axis-aligned bounding rectangle around
 	// the decal. It would be more efficient for rendering if we excluded tiles
 	// that are outside the (non-axis-aligned) decal rectangle.
 
 	ssize_t i0, j0, i1, j1;
 	m_Decal->CalcVertexExtents(i0, j0, i1, j1);
 	// Currently CalcVertexExtents might return empty rectangle, that means
 	// we can't render it.
 	if (i1 <= i0 || j1 <= j0)
 	{
 		// We have nothing to render.
 		m_VBDecals.Reset();
 		m_VBDecalsIndices.Reset();
 		return;
 	}
 
 	CmpPtr<ICmpWaterManager> cmpWaterManager(*m_Simulation, SYSTEM_ENTITY);
 
 	std::vector<SDecalVertex> vertices((i1 - i0 + 1) * (j1 - j0 + 1));
 
 	for (ssize_t j = j0, idx = 0; j <= j1; ++j)
 	{
 		for (ssize_t i = i0; i <= i1; ++i, ++idx)
 		{
 			SDecalVertex& vertex = vertices[idx];
 			m_Decal->m_Terrain->CalcPosition(i, j, vertex.m_Position);
 
 			if (decal.m_Floating && cmpWaterManager)
 			{
 				vertex.m_Position.Y = std::max(
 					vertex.m_Position.Y,
 					cmpWaterManager->GetExactWaterLevel(vertex.m_Position.X, vertex.m_Position.Z));
 			}
 
 			m_Decal->m_Terrain->CalcNormal(i, j, vertex.m_Normal);
 
 			// Map from world space back into decal texture space.
 			CVector3D inv = m_Decal->GetInvTransform().Transform(vertex.m_Position);
 			vertex.m_UV.X = 0.5f + (inv.X - decal.m_OffsetX) / decal.m_SizeX;
 			// Flip V to match our texture convention.
 			vertex.m_UV.Y = 0.5f - (inv.Z - decal.m_OffsetZ) / decal.m_SizeZ;
 		}
 	}
 
 	if (!m_VBDecals || m_VBDecals->m_Count != vertices.size())
 	{
 		m_VBDecals = g_VBMan.AllocateChunk(
 			sizeof(SDecalVertex), vertices.size(),
 			Renderer::Backend::GL::CBuffer::Type::VERTEX, false);
 	}
 	m_VBDecals->m_Owner->UpdateChunkVertices(m_VBDecals.Get(), vertices.data());
 
 	std::vector<u16> indices((i1 - i0) * (j1 - j0) * 6);
 
 	const ssize_t w = i1 - i0 + 1;
 	auto itIdx = indices.begin();
 	const size_t base = m_VBDecals->m_Index;
 	for (ssize_t dj = 0; dj < j1 - j0; ++dj)
 	{
 		for (ssize_t di = 0; di < i1 - i0; ++di)
 		{
 			const bool dir = m_Decal->m_Terrain->GetTriangulationDir(i0 + di, j0 + dj);
 			if (dir)
 			{
 				*itIdx++ = u16(((dj + 0) * w + (di + 0)) + base);
 				*itIdx++ = u16(((dj + 0) * w + (di + 1)) + base);
 				*itIdx++ = u16(((dj + 1) * w + (di + 0)) + base);
 
 				*itIdx++ = u16(((dj + 0) * w + (di + 1)) + base);
 				*itIdx++ = u16(((dj + 1) * w + (di + 1)) + base);
 				*itIdx++ = u16(((dj + 1) * w + (di + 0)) + base);
 			}
 			else
 			{
 				*itIdx++ = u16(((dj + 0) * w + (di + 0)) + base);
 				*itIdx++ = u16(((dj + 0) * w + (di + 1)) + base);
 				*itIdx++ = u16(((dj + 1) * w + (di + 1)) + base);
 
 				*itIdx++ = u16(((dj + 1) * w + (di + 1)) + base);
 				*itIdx++ = u16(((dj + 1) * w + (di + 0)) + base);
 				*itIdx++ = u16(((dj + 0) * w + (di + 0)) + base);
 			}
 		}
 	}
 
 	// Construct vertex buffer.
 	if (!m_VBDecalsIndices || m_VBDecalsIndices->m_Count != indices.size())
 	{
 		m_VBDecalsIndices = g_VBMan.AllocateChunk(
 			sizeof(u16), indices.size(),
 			Renderer::Backend::GL::CBuffer::Type::INDEX, false);
 	}
 	m_VBDecalsIndices->m_Owner->UpdateChunkVertices(m_VBDecalsIndices.Get(), indices.data());
 }
Index: ps/trunk/source/renderer/HWLightingModelRenderer.cpp
===================================================================
--- ps/trunk/source/renderer/HWLightingModelRenderer.cpp	(revision 26524)
+++ ps/trunk/source/renderer/HWLightingModelRenderer.cpp	(revision 26525)
@@ -1,249 +1,245 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "renderer/HWLightingModelRenderer.h"
 
 #include "graphics/Color.h"
 #include "graphics/LightEnv.h"
 #include "graphics/Model.h"
 #include "graphics/ModelDef.h"
 #include "graphics/ShaderProgram.h"
 #include "lib/bits.h"
 #include "lib/ogl.h"
 #include "lib/sysdep/rtl.h"
 #include "maths/Vector3D.h"
 #include "renderer/Renderer.h"
 #include "renderer/RenderModifiers.h"
 #include "renderer/VertexArray.h"
 
 
 struct ShaderModelDef : public CModelDefRPrivate
 {
 	/// Indices are the same for all models, so share them
 	VertexIndexArray m_IndexArray;
 
 	/// Static per-CModelDef vertex array
 	VertexArray m_Array;
 
 	/// The number of UVs is determined by the model
 	std::vector<VertexArray::Attribute> m_UVs;
 
 	ShaderModelDef(const CModelDefPtr& mdef);
 };
 
 
 ShaderModelDef::ShaderModelDef(const CModelDefPtr& mdef)
 	: m_IndexArray(false),
 	m_Array(Renderer::Backend::GL::CBuffer::Type::VERTEX, false)
 {
 	size_t numVertices = mdef->GetNumVertices();
 
 	m_UVs.resize(mdef->GetNumUVsPerVertex());
 	for (size_t i = 0; i < mdef->GetNumUVsPerVertex(); ++i)
 	{
 		m_UVs[i].type = GL_FLOAT;
 		m_UVs[i].elems = 2;
 		m_Array.AddAttribute(&m_UVs[i]);
 	}
 
 	m_Array.SetNumberOfVertices(numVertices);
 	m_Array.Layout();
 
 	for (size_t i = 0; i < mdef->GetNumUVsPerVertex(); ++i)
 	{
 		VertexArrayIterator<float[2]> UVit = m_UVs[i].GetIterator<float[2]>();
 		ModelRenderer::BuildUV(mdef, UVit, i);
 	}
 
 	m_Array.Upload();
 	m_Array.FreeBackingStore();
 
 	m_IndexArray.SetNumberOfVertices(mdef->GetNumFaces()*3);
 	m_IndexArray.Layout();
 	ModelRenderer::BuildIndices(mdef, m_IndexArray.GetIterator());
 	m_IndexArray.Upload();
 	m_IndexArray.FreeBackingStore();
 }
 
 
 struct ShaderModel : public CModelRData
 {
 	/// Dynamic per-CModel vertex array
 	VertexArray m_Array;
 
 	/// Position and normals/lighting are recalculated on CPU every frame
 	VertexArray::Attribute m_Position;
 	VertexArray::Attribute m_Normal;
 
 	ShaderModel(const void* key)
 		: CModelRData(key),
 		m_Array(Renderer::Backend::GL::CBuffer::Type::VERTEX, true)
 	{}
 };
 
 
 struct ShaderModelVertexRenderer::ShaderModelRendererInternals
 {
 	/// Previously prepared modeldef
 	ShaderModelDef* shadermodeldef;
 };
 
 
 // Construction and Destruction
 ShaderModelVertexRenderer::ShaderModelVertexRenderer()
 {
 	m = new ShaderModelRendererInternals;
 	m->shadermodeldef = nullptr;
 }
 
 ShaderModelVertexRenderer::~ShaderModelVertexRenderer()
 {
 	delete m;
 }
 
 
 // Build model data (and modeldef data if necessary)
 CModelRData* ShaderModelVertexRenderer::CreateModelData(const void* key, CModel* model)
 {
 	CModelDefPtr mdef = model->GetModelDef();
 	ShaderModelDef* shadermodeldef = (ShaderModelDef*)mdef->GetRenderData(m);
 
 	if (!shadermodeldef)
 	{
 		shadermodeldef = new ShaderModelDef(mdef);
 		mdef->SetRenderData(m, shadermodeldef);
 	}
 
 	// Build the per-model data
 	ShaderModel* shadermodel = new ShaderModel(key);
 
 	// Positions and normals must be 16-byte aligned for SSE writes.
 
 	shadermodel->m_Position.type = GL_FLOAT;
 	shadermodel->m_Position.elems = 4;
 	shadermodel->m_Array.AddAttribute(&shadermodel->m_Position);
 
 	shadermodel->m_Normal.type = GL_FLOAT;
 	shadermodel->m_Normal.elems = 4;
 	shadermodel->m_Array.AddAttribute(&shadermodel->m_Normal);
 
 	shadermodel->m_Array.SetNumberOfVertices(mdef->GetNumVertices());
 	shadermodel->m_Array.Layout();
 
 	// Verify alignment
 	ENSURE(shadermodel->m_Position.offset % 16 == 0);
 	ENSURE(shadermodel->m_Normal.offset % 16 == 0);
 	ENSURE(shadermodel->m_Array.GetStride() % 16 == 0);
 
 	return shadermodel;
 }
 
 
 // Fill in and upload dynamic vertex array
 void ShaderModelVertexRenderer::UpdateModelData(CModel* model, CModelRData* data, int updateflags)
 {
 	ShaderModel* shadermodel = static_cast<ShaderModel*>(data);
 
 	if (updateflags & RENDERDATA_UPDATE_VERTICES)
 	{
 		// build vertices
 		VertexArrayIterator<CVector3D> Position = shadermodel->m_Position.GetIterator<CVector3D>();
 		VertexArrayIterator<CVector3D> Normal = shadermodel->m_Normal.GetIterator<CVector3D>();
 
 		ModelRenderer::BuildPositionAndNormals(model, Position, Normal);
 
 		// upload everything to vertex buffer
 		shadermodel->m_Array.Upload();
 	}
 
 	shadermodel->m_Array.PrepareForRendering();
 }
 
 
 // Setup one rendering pass
 void ShaderModelVertexRenderer::BeginPass(int streamflags)
 {
 	ENSURE(streamflags == (streamflags & (STREAM_POS | STREAM_UV0 | STREAM_UV1 | STREAM_NORMAL)));
 }
 
 // Cleanup one rendering pass
 void ShaderModelVertexRenderer::EndPass(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext, int UNUSED(streamflags))
 {
 	CVertexBuffer::Unbind(deviceCommandContext);
 }
 
 
 // Prepare UV coordinates for this modeldef
 void ShaderModelVertexRenderer::PrepareModelDef(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CShaderProgramPtr& shader, int streamflags, const CModelDef& def)
 {
 	m->shadermodeldef = (ShaderModelDef*)def.GetRenderData(m);
 
 	ENSURE(m->shadermodeldef);
 
 	u8* base = m->shadermodeldef->m_Array.Bind(deviceCommandContext);
 	GLsizei stride = (GLsizei)m->shadermodeldef->m_Array.GetStride();
 
 	if (streamflags & STREAM_UV0)
 		shader->TexCoordPointer(GL_TEXTURE0, 2, GL_FLOAT, stride, base + m->shadermodeldef->m_UVs[0].offset);
 
 	if ((streamflags & STREAM_UV1) && def.GetNumUVsPerVertex() >= 2)
 		shader->TexCoordPointer(GL_TEXTURE1, 2, GL_FLOAT, stride, base + m->shadermodeldef->m_UVs[1].offset);
 }
 
 
 // Render one model
 void ShaderModelVertexRenderer::RenderModel(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CShaderProgramPtr& shader, int streamflags, CModel* model, CModelRData* data)
 {
 	const CModelDefPtr& mdldef = model->GetModelDef();
 	ShaderModel* shadermodel = static_cast<ShaderModel*>(data);
 
 	u8* base = shadermodel->m_Array.Bind(deviceCommandContext);
 	GLsizei stride = (GLsizei)shadermodel->m_Array.GetStride();
 
-	u8* indexBase = m->shadermodeldef->m_IndexArray.Bind(deviceCommandContext);
+	m->shadermodeldef->m_IndexArray.UploadIfNeeded(deviceCommandContext);
+	deviceCommandContext->SetIndexBuffer(m->shadermodeldef->m_IndexArray.GetBuffer());
 
 	if (streamflags & STREAM_POS)
 		shader->VertexPointer(3, GL_FLOAT, stride, base + shadermodel->m_Position.offset);
 
 	if (streamflags & STREAM_NORMAL)
 		shader->NormalPointer(GL_FLOAT, stride, base + shadermodel->m_Normal.offset);
 
 	shader->AssertPointersBound();
 
-	// render the lot
-	size_t numFaces = mdldef->GetNumFaces();
+	// Render the lot.
+	const size_t numberOfFaces = mdldef->GetNumFaces();
 
-	// Draw with DrawRangeElements where available, since it might be more efficient
-#if CONFIG2_GLES
-	glDrawElements(GL_TRIANGLES, (GLsizei)numFaces*3, GL_UNSIGNED_SHORT, indexBase);
-#else
-	glDrawRangeElementsEXT(GL_TRIANGLES, 0, (GLuint)mdldef->GetNumVertices()-1,
-		(GLsizei)numFaces*3, GL_UNSIGNED_SHORT, indexBase);
-#endif
+	deviceCommandContext->DrawIndexedInRange(
+		m->shadermodeldef->m_IndexArray.GetOffset(), numberOfFaces * 3, 0, mdldef->GetNumVertices() - 1);
 
-	// bump stats
+	// Bump stats.
 	g_Renderer.m_Stats.m_DrawCalls++;
-	g_Renderer.m_Stats.m_ModelTris += numFaces;
+	g_Renderer.m_Stats.m_ModelTris += numberOfFaces;
 }
 
Index: ps/trunk/source/renderer/InstancingModelRenderer.cpp
===================================================================
--- ps/trunk/source/renderer/InstancingModelRenderer.cpp	(revision 26524)
+++ ps/trunk/source/renderer/InstancingModelRenderer.cpp	(revision 26525)
@@ -1,395 +1,391 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 #include "renderer/InstancingModelRenderer.h"
 
 #include "graphics/Color.h"
 #include "graphics/LightEnv.h"
 #include "graphics/Model.h"
 #include "graphics/ModelDef.h"
 #include "lib/ogl.h"
 #include "maths/Vector3D.h"
 #include "maths/Vector4D.h"
 #include "ps/CLogger.h"
 #include "ps/CStrInternStatic.h"
 #include "renderer/Renderer.h"
 #include "renderer/RenderModifiers.h"
 #include "renderer/VertexArray.h"
 #include "third_party/mikktspace/weldmesh.h"
 
 
 struct IModelDef : public CModelDefRPrivate
 {
 	/// Static per-CModel vertex array
 	VertexArray m_Array;
 
 	/// Position and normals are static
 	VertexArray::Attribute m_Position;
 	VertexArray::Attribute m_Normal;
 	VertexArray::Attribute m_Tangent;
 	VertexArray::Attribute m_BlendJoints; // valid iff gpuSkinning == true
 	VertexArray::Attribute m_BlendWeights; // valid iff gpuSkinning == true
 
 	/// The number of UVs is determined by the model
 	std::vector<VertexArray::Attribute> m_UVs;
 
 	/// Indices are the same for all models, so share them
 	VertexIndexArray m_IndexArray;
 
 	IModelDef(const CModelDefPtr& mdef, bool gpuSkinning, bool calculateTangents);
 };
 
 
 IModelDef::IModelDef(const CModelDefPtr& mdef, bool gpuSkinning, bool calculateTangents)
 	: m_IndexArray(false), m_Array(Renderer::Backend::GL::CBuffer::Type::VERTEX, false)
 {
 	size_t numVertices = mdef->GetNumVertices();
 
 	m_Position.type = GL_FLOAT;
 	m_Position.elems = 3;
 	m_Array.AddAttribute(&m_Position);
 
 	m_Normal.type = GL_FLOAT;
 	m_Normal.elems = 3;
 	m_Array.AddAttribute(&m_Normal);
 
 	m_UVs.resize(mdef->GetNumUVsPerVertex());
 	for (size_t i = 0; i < mdef->GetNumUVsPerVertex(); i++)
 	{
 		m_UVs[i].type = GL_FLOAT;
 		m_UVs[i].elems = 2;
 		m_Array.AddAttribute(&m_UVs[i]);
 	}
 
 	if (gpuSkinning)
 	{
 		// We can't use a lot of bones because it costs uniform memory. Recommended
 		// number of bones per model is 32.
 		// Add 1 to NumBones because of the special 'root' bone.
 		if (mdef->GetNumBones() + 1 > 64)
 			LOGERROR("Model '%s' has too many bones %zu/64", mdef->GetName().string8().c_str(), mdef->GetNumBones() + 1);
 		ENSURE(mdef->GetNumBones() + 1 <= 64);
 
 		m_BlendJoints.type = GL_UNSIGNED_BYTE;
 		m_BlendJoints.elems = 4;
 		m_Array.AddAttribute(&m_BlendJoints);
 
 		m_BlendWeights.type = GL_UNSIGNED_BYTE;
 		m_BlendWeights.elems = 4;
 		m_Array.AddAttribute(&m_BlendWeights);
 	}
 
 	if (calculateTangents)
 	{
 		// Generate tangents for the geometry:-
 
 		m_Tangent.type = GL_FLOAT;
 		m_Tangent.elems = 4;
 		m_Array.AddAttribute(&m_Tangent);
 
 		// floats per vertex; position + normal + tangent + UV*sets [+ GPUskinning]
 		int numVertexAttrs = 3 + 3 + 4 + 2 * mdef->GetNumUVsPerVertex();
 		if (gpuSkinning)
 		{
 			numVertexAttrs += 8;
 		}
 
 		// the tangent generation can increase the number of vertices temporarily
 		// so reserve a bit more memory to avoid reallocations in GenTangents (in most cases)
 		std::vector<float> newVertices;
 		newVertices.reserve(numVertexAttrs * numVertices * 2);
 
 		// Generate the tangents
 		ModelRenderer::GenTangents(mdef, newVertices, gpuSkinning);
 
 		// how many vertices do we have after generating tangents?
 		int newNumVert = newVertices.size() / numVertexAttrs;
 
 		std::vector<int> remapTable(newNumVert);
 		std::vector<float> vertexDataOut(newNumVert * numVertexAttrs);
 
 		// re-weld the mesh to remove duplicated vertices
 		int numVertices2 = WeldMesh(&remapTable[0], &vertexDataOut[0],
 					&newVertices[0], newNumVert, numVertexAttrs);
 
 		// Copy the model data to graphics memory:-
 
 		m_Array.SetNumberOfVertices(numVertices2);
 		m_Array.Layout();
 
 		VertexArrayIterator<CVector3D> Position = m_Position.GetIterator<CVector3D>();
 		VertexArrayIterator<CVector3D> Normal = m_Normal.GetIterator<CVector3D>();
 		VertexArrayIterator<CVector4D> Tangent = m_Tangent.GetIterator<CVector4D>();
 
 		VertexArrayIterator<u8[4]> BlendJoints;
 		VertexArrayIterator<u8[4]> BlendWeights;
 		if (gpuSkinning)
 		{
 			BlendJoints = m_BlendJoints.GetIterator<u8[4]>();
 			BlendWeights = m_BlendWeights.GetIterator<u8[4]>();
 		}
 
 		// copy everything into the vertex array
 		for (int i = 0; i < numVertices2; i++)
 		{
 			int q = numVertexAttrs * i;
 
 			Position[i] = CVector3D(vertexDataOut[q + 0], vertexDataOut[q + 1], vertexDataOut[q + 2]);
 			q += 3;
 
 			Normal[i] = CVector3D(vertexDataOut[q + 0], vertexDataOut[q + 1], vertexDataOut[q + 2]);
 			q += 3;
 
 			Tangent[i] = CVector4D(vertexDataOut[q + 0], vertexDataOut[q + 1], vertexDataOut[q + 2],
 					vertexDataOut[q + 3]);
 			q += 4;
 
 			if (gpuSkinning)
 			{
 				for (size_t j = 0; j < 4; ++j)
 				{
 					BlendJoints[i][j] = (u8)vertexDataOut[q + 0 + 2 * j];
 					BlendWeights[i][j] = (u8)vertexDataOut[q + 1 + 2 * j];
 				}
 				q += 8;
 			}
 
 			for (size_t j = 0; j < mdef->GetNumUVsPerVertex(); j++)
 			{
 				VertexArrayIterator<float[2]> UVit = m_UVs[j].GetIterator<float[2]>();
 				UVit[i][0] = vertexDataOut[q + 0 + 2 * j];
 				UVit[i][1] = vertexDataOut[q + 1 + 2 * j];
 			}
 		}
 
 		// upload vertex data
 		m_Array.Upload();
 		m_Array.FreeBackingStore();
 
 		m_IndexArray.SetNumberOfVertices(mdef->GetNumFaces() * 3);
 		m_IndexArray.Layout();
 
 		VertexArrayIterator<u16> Indices = m_IndexArray.GetIterator();
 
 		size_t idxidx = 0;
 
 		// reindex geometry and upload index
 		for (size_t j = 0; j < mdef->GetNumFaces(); ++j)
 		{
 			Indices[idxidx++] = remapTable[j * 3 + 0];
 			Indices[idxidx++] = remapTable[j * 3 + 1];
 			Indices[idxidx++] = remapTable[j * 3 + 2];
 		}
 
 		m_IndexArray.Upload();
 		m_IndexArray.FreeBackingStore();
 	}
 	else
 	{
 		// Upload model without calculating tangents:-
 
 		m_Array.SetNumberOfVertices(numVertices);
 		m_Array.Layout();
 
 		VertexArrayIterator<CVector3D> Position = m_Position.GetIterator<CVector3D>();
 		VertexArrayIterator<CVector3D> Normal = m_Normal.GetIterator<CVector3D>();
 
 		ModelRenderer::CopyPositionAndNormals(mdef, Position, Normal);
 
 		for (size_t i = 0; i < mdef->GetNumUVsPerVertex(); i++)
 		{
 			VertexArrayIterator<float[2]> UVit = m_UVs[i].GetIterator<float[2]>();
 			ModelRenderer::BuildUV(mdef, UVit, i);
 		}
 
 		if (gpuSkinning)
 		{
 			VertexArrayIterator<u8[4]> BlendJoints = m_BlendJoints.GetIterator<u8[4]>();
 			VertexArrayIterator<u8[4]> BlendWeights = m_BlendWeights.GetIterator<u8[4]>();
 			for (size_t i = 0; i < numVertices; ++i)
 			{
 				const SModelVertex& vtx = mdef->GetVertices()[i];
 				for (size_t j = 0; j < 4; ++j)
 				{
 					BlendJoints[i][j] = vtx.m_Blend.m_Bone[j];
 					BlendWeights[i][j] = (u8)(255.f * vtx.m_Blend.m_Weight[j]);
 				}
 			}
 		}
 
 		m_Array.Upload();
 		m_Array.FreeBackingStore();
 
 		m_IndexArray.SetNumberOfVertices(mdef->GetNumFaces()*3);
 		m_IndexArray.Layout();
 		ModelRenderer::BuildIndices(mdef, m_IndexArray.GetIterator());
 		m_IndexArray.Upload();
 		m_IndexArray.FreeBackingStore();
 	}
 }
 
 
 struct InstancingModelRendererInternals
 {
 	bool gpuSkinning;
 
 	bool calculateTangents;
 
 	/// Previously prepared modeldef
 	IModelDef* imodeldef;
 
 	/// Index base for imodeldef
 	u8* imodeldefIndexBase;
 };
 
 
 // Construction and Destruction
 InstancingModelRenderer::InstancingModelRenderer(bool gpuSkinning, bool calculateTangents)
 {
 	m = new InstancingModelRendererInternals;
 	m->gpuSkinning = gpuSkinning;
 	m->calculateTangents = calculateTangents;
 	m->imodeldef = 0;
 }
 
 InstancingModelRenderer::~InstancingModelRenderer()
 {
 	delete m;
 }
 
 
 // Build modeldef data if necessary - we have no per-CModel data
 CModelRData* InstancingModelRenderer::CreateModelData(const void* key, CModel* model)
 {
 	CModelDefPtr mdef = model->GetModelDef();
 	IModelDef* imodeldef = (IModelDef*)mdef->GetRenderData(m);
 
 	if (m->gpuSkinning)
  		ENSURE(model->IsSkinned());
 	else
 		ENSURE(!model->IsSkinned());
 
 	if (!imodeldef)
 	{
 		imodeldef = new IModelDef(mdef, m->gpuSkinning, m->calculateTangents);
 		mdef->SetRenderData(m, imodeldef);
 	}
 
 	return new CModelRData(key);
 }
 
 
 void InstancingModelRenderer::UpdateModelData(CModel* UNUSED(model), CModelRData* UNUSED(data), int UNUSED(updateflags))
 {
 	// We have no per-CModel data
 }
 
 
 // Setup one rendering pass.
 void InstancingModelRenderer::BeginPass(int streamflags)
 {
 	ENSURE(streamflags == (streamflags & (STREAM_POS|STREAM_NORMAL|STREAM_UV0|STREAM_UV1)));
 }
 
 // Cleanup rendering pass.
 void InstancingModelRenderer::EndPass(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	int UNUSED(streamflags))
 {
 	CVertexBuffer::Unbind(deviceCommandContext);
 }
 
 
 // Prepare UV coordinates for this modeldef
 void InstancingModelRenderer::PrepareModelDef(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CShaderProgramPtr& shader, int streamflags, const CModelDef& def)
 {
 	m->imodeldef = (IModelDef*)def.GetRenderData(m);
 
 	ENSURE(m->imodeldef);
 
 	u8* base = m->imodeldef->m_Array.Bind(deviceCommandContext);
 	GLsizei stride = (GLsizei)m->imodeldef->m_Array.GetStride();
 
-	m->imodeldefIndexBase = m->imodeldef->m_IndexArray.Bind(deviceCommandContext);
+	m->imodeldef->m_IndexArray.UploadIfNeeded(deviceCommandContext);
+	deviceCommandContext->SetIndexBuffer(m->imodeldef->m_IndexArray.GetBuffer());
 
 	if (streamflags & STREAM_POS)
 		shader->VertexPointer(3, GL_FLOAT, stride, base + m->imodeldef->m_Position.offset);
 
 	if (streamflags & STREAM_NORMAL)
 		shader->NormalPointer(GL_FLOAT, stride, base + m->imodeldef->m_Normal.offset);
 
 	if (m->calculateTangents)
 		shader->VertexAttribPointer(str_a_tangent, 4, GL_FLOAT, GL_FALSE, stride, base + m->imodeldef->m_Tangent.offset);
 
 	// The last UV set is STREAM_UV3
 	for (size_t uv = 0; uv < 4; ++uv)
 		if (streamflags & (STREAM_UV0 << uv))
 		{
 			if (def.GetNumUVsPerVertex() >= uv + 1)
 				shader->TexCoordPointer(GL_TEXTURE0 + uv, 2, GL_FLOAT, stride, base + m->imodeldef->m_UVs[uv].offset);
 			else
 				ONCE(LOGERROR("Model '%s' has no UV%d set.", def.GetName().string8().c_str(), uv));
 		}
 
 	// GPU skinning requires extra attributes to compute positions/normals
 	if (m->gpuSkinning)
 	{
 		shader->VertexAttribPointer(str_a_skinJoints, 4, GL_UNSIGNED_BYTE, GL_FALSE, stride, base + m->imodeldef->m_BlendJoints.offset);
 		shader->VertexAttribPointer(str_a_skinWeights, 4, GL_UNSIGNED_BYTE, GL_TRUE, stride, base + m->imodeldef->m_BlendWeights.offset);
 	}
 
 	shader->AssertPointersBound();
 }
 
 
 // Render one model
 void InstancingModelRenderer::RenderModel(
-	Renderer::Backend::GL::CDeviceCommandContext* UNUSED(deviceCommandContext),
+	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CShaderProgramPtr& shader, int UNUSED(streamflags), CModel* model, CModelRData* UNUSED(data))
 {
 	const CModelDefPtr& mdldef = model->GetModelDef();
 
 	if (m->gpuSkinning)
 	{
 		// Bind matrices for current animation state.
 		// Add 1 to NumBones because of the special 'root' bone.
 		// HACK: NVIDIA drivers return uniform name with "[0]", Intel Windows drivers without;
 		// try uploading both names since one of them should work, and this is easier than
 		// canonicalising the uniform names in CShaderProgramGLSL
 		shader->Uniform(str_skinBlendMatrices_0, mdldef->GetNumBones() + 1, model->GetAnimatedBoneMatrices());
 		shader->Uniform(str_skinBlendMatrices, mdldef->GetNumBones() + 1, model->GetAnimatedBoneMatrices());
 	}
 
-	// render the lot
-	size_t numFaces = mdldef->GetNumFaces();
+	// Render the lot.
+	const size_t numberOfFaces = mdldef->GetNumFaces();
 
-	// Draw with DrawRangeElements where available, since it might be more efficient
-#if CONFIG2_GLES
-	glDrawElements(GL_TRIANGLES, (GLsizei)numFaces*3, GL_UNSIGNED_SHORT, m->imodeldefIndexBase);
-#else
-	glDrawRangeElementsEXT(GL_TRIANGLES, 0, (GLuint)m->imodeldef->m_Array.GetNumberOfVertices()-1,
-			(GLsizei)numFaces*3, GL_UNSIGNED_SHORT, m->imodeldefIndexBase);
-#endif
+	deviceCommandContext->DrawIndexedInRange(
+		m->imodeldef->m_IndexArray.GetOffset(), numberOfFaces * 3, 0, m->imodeldef->m_Array.GetNumberOfVertices() - 1);
 
-	// bump stats
+	// Bump stats.
 	g_Renderer.m_Stats.m_DrawCalls++;
-	g_Renderer.m_Stats.m_ModelTris += numFaces;
+	g_Renderer.m_Stats.m_ModelTris += numberOfFaces;
 
 }
Index: ps/trunk/source/renderer/OverlayRenderer.cpp
===================================================================
--- ps/trunk/source/renderer/OverlayRenderer.cpp	(revision 26524)
+++ ps/trunk/source/renderer/OverlayRenderer.cpp	(revision 26525)
@@ -1,799 +1,801 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "OverlayRenderer.h"
 
 #include "graphics/Camera.h"
 #include "graphics/LOSTexture.h"
 #include "graphics/Overlay.h"
 #include "graphics/ShaderManager.h"
 #include "graphics/Terrain.h"
 #include "graphics/TextureManager.h"
 #include "lib/hash.h"
 #include "lib/ogl.h"
 #include "maths/MathUtil.h"
 #include "maths/Quaternion.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Game.h"
 #include "ps/Profile.h"
 #include "renderer/DebugRenderer.h"
 #include "renderer/Renderer.h"
 #include "renderer/SceneRenderer.h"
 #include "renderer/TexturedLineRData.h"
 #include "renderer/VertexArray.h"
 #include "renderer/VertexBuffer.h"
 #include "renderer/VertexBufferManager.h"
 #include "simulation2/components/ICmpWaterManager.h"
 #include "simulation2/Simulation2.h"
 #include "simulation2/system/SimContext.h"
 
 #include <unordered_map>
 
 namespace
 {
 
 CShaderTechniquePtr GetOverlayLineShaderTechnique(const CShaderDefines& defines)
 {
 	return g_Renderer.GetShaderManager().LoadEffect(str_overlay_line, defines);
 }
 
 } // anonymous namespace
 
 /**
  * Key used to group quads into batches for more efficient rendering. Currently groups by the combination
  * of the main texture and the texture mask, to minimize texture swapping during rendering.
  */
 struct QuadBatchKey
 {
 	QuadBatchKey (const CTexturePtr& texture, const CTexturePtr& textureMask)
 		: m_Texture(texture), m_TextureMask(textureMask)
 	{ }
 
 	bool operator==(const QuadBatchKey& other) const
 	{
 		return (m_Texture == other.m_Texture && m_TextureMask == other.m_TextureMask);
 	}
 
 	CTexturePtr m_Texture;
 	CTexturePtr m_TextureMask;
 };
 
 struct QuadBatchHash
 {
 	std::size_t operator()(const QuadBatchKey& d) const
 	{
 		size_t seed = 0;
 		hash_combine(seed, d.m_Texture);
 		hash_combine(seed, d.m_TextureMask);
 		return seed;
 	}
 };
 
 /**
  * Holds information about a single quad rendering batch.
  */
 class QuadBatchData : public CRenderData
 {
 public:
 	QuadBatchData() : m_IndicesBase(0), m_NumRenderQuads(0) { }
 
 	/// Holds the quad overlay structures requested to be rendered in this batch. Must be cleared
 	/// after each frame.
 	std::vector<SOverlayQuad*> m_Quads;
 
 	/// Start index of this batch into the dedicated quad indices VertexArray (see OverlayInternals).
 	size_t m_IndicesBase;
 	/// Amount of quads to actually render in this batch. Potentially (although unlikely to be)
 	/// different from m_Quads.size() due to restrictions on the total amount of quads that can be
 	/// rendered. Must be reset after each frame.
 	size_t m_NumRenderQuads;
 };
 
 struct OverlayRendererInternals
 {
 	using QuadBatchMap = std::unordered_map<QuadBatchKey, QuadBatchData, QuadBatchHash>;
 
 	OverlayRendererInternals();
 	~OverlayRendererInternals(){ }
 
 	std::vector<SOverlayLine*> lines;
 	std::vector<SOverlayTexturedLine*> texlines;
 	std::vector<SOverlaySprite*> sprites;
 	std::vector<SOverlayQuad*> quads;
 	std::vector<SOverlaySphere*> spheres;
 
 	QuadBatchMap quadBatchMap;
 
 	// Dedicated vertex/index buffers for rendering all quads (to within the limits set by
 	// MAX_QUAD_OVERLAYS).
 	VertexArray quadVertices;
 	VertexArray::Attribute quadAttributePos;
 	VertexArray::Attribute quadAttributeColor;
 	VertexArray::Attribute quadAttributeUV;
 	VertexIndexArray quadIndices;
 
 	/// Maximum amount of quad overlays we support for rendering. This limit is set to be able to
 	/// render all quads from a single dedicated VB without having to reallocate it, which is much
 	/// faster in the typical case of rendering only a handful of quads. When modifying this value,
 	/// you must take care for the new amount of quads to fit in a single VBO (which is not likely
 	/// to be a problem).
 	static const size_t MAX_QUAD_OVERLAYS = 1024;
 
 	// Sets of commonly-(re)used shader defines.
 	CShaderDefines defsOverlayLineNormal;
 	CShaderDefines defsOverlayLineAlwaysVisible;
 	CShaderDefines defsQuadOverlay;
 
 	// Geometry for a unit sphere
 	std::vector<float> sphereVertexes;
 	std::vector<u16> sphereIndexes;
 	void GenerateSphere();
 
 	/// Performs one-time setup. Called from CRenderer::Open, after graphics capabilities have
 	/// been detected. Note that no VBOs must be created before this is called, since the shader
 	/// path and graphics capabilities are not guaranteed to be stable before this point.
 	void Initialize();
 };
 
 const float OverlayRenderer::OVERLAY_VOFFSET = 0.2f;
 
 OverlayRendererInternals::OverlayRendererInternals()
 	: quadVertices(Renderer::Backend::GL::CBuffer::Type::VERTEX, true),
 	quadIndices(false)
 {
 	quadAttributePos.elems = 3;
 	quadAttributePos.type = GL_FLOAT;
 	quadVertices.AddAttribute(&quadAttributePos);
 
 	quadAttributeColor.elems = 4;
 	quadAttributeColor.type = GL_FLOAT;
 	quadVertices.AddAttribute(&quadAttributeColor);
 
 	quadAttributeUV.elems = 2;
 	quadAttributeUV.type = GL_SHORT; // don't use GL_UNSIGNED_SHORT here, TexCoordPointer won't accept it
 	quadVertices.AddAttribute(&quadAttributeUV);
 
 	// Note that we're reusing the textured overlay line shader for the quad overlay rendering. This
 	// is because their code is almost identical; the only difference is that for the quad overlays
 	// we want to use a vertex color stream as opposed to an objectColor uniform. To this end, the
 	// shader has been set up to switch between the two behaviours based on the USE_OBJECTCOLOR define.
 	defsOverlayLineNormal.Add(str_USE_OBJECTCOLOR, str_1);
 	defsOverlayLineAlwaysVisible.Add(str_USE_OBJECTCOLOR, str_1);
 	defsOverlayLineAlwaysVisible.Add(str_IGNORE_LOS, str_1);
 }
 
 void OverlayRendererInternals::Initialize()
 {
 	// Perform any initialization after graphics capabilities have been detected. Notably,
 	// only at this point can we safely allocate VBOs (in contrast to e.g. in the constructor),
 	// because their creation depends on the shader path, which is not reliably set before this point.
 
 	quadVertices.SetNumberOfVertices(MAX_QUAD_OVERLAYS * 4);
 	quadVertices.Layout(); // allocate backing store
 
 	quadIndices.SetNumberOfVertices(MAX_QUAD_OVERLAYS * 6);
 	quadIndices.Layout(); // allocate backing store
 
 	// Since the quads in the vertex array are independent and always consist of exactly 4 vertices per quad, the
 	// indices are always the same; we can therefore fill in all the indices once and pretty much forget about
 	// them. We then also no longer need its backing store, since we never change any indices afterwards.
 	VertexArrayIterator<u16> index = quadIndices.GetIterator();
 	for (u16 i = 0; i < static_cast<u16>(MAX_QUAD_OVERLAYS); ++i)
 	{
 		*index++ = i * 4 + 0;
 		*index++ = i * 4 + 1;
 		*index++ = i * 4 + 2;
 		*index++ = i * 4 + 2;
 		*index++ = i * 4 + 3;
 		*index++ = i * 4 + 0;
 	}
 	quadIndices.Upload();
 	quadIndices.FreeBackingStore();
 }
 
 OverlayRenderer::OverlayRenderer()
 {
 	m = new OverlayRendererInternals();
 }
 
 OverlayRenderer::~OverlayRenderer()
 {
 	delete m;
 }
 
 void OverlayRenderer::Initialize()
 {
 	m->Initialize();
 }
 
 void OverlayRenderer::Submit(SOverlayLine* line)
 {
 	m->lines.push_back(line);
 }
 
 void OverlayRenderer::Submit(SOverlayTexturedLine* line)
 {
 	// Simplify the rest of the code by guaranteeing non-empty lines
 	if (line->m_Coords.empty())
 		return;
 
 	m->texlines.push_back(line);
 }
 
 void OverlayRenderer::Submit(SOverlaySprite* overlay)
 {
 	m->sprites.push_back(overlay);
 }
 
 void OverlayRenderer::Submit(SOverlayQuad* overlay)
 {
 	m->quads.push_back(overlay);
 }
 
 void OverlayRenderer::Submit(SOverlaySphere* overlay)
 {
 	m->spheres.push_back(overlay);
 }
 
 void OverlayRenderer::EndFrame()
 {
 	m->lines.clear();
 	m->texlines.clear();
 	m->sprites.clear();
 	m->quads.clear();
 	m->spheres.clear();
 
 	// this should leave the capacity unchanged, which is okay since it
 	// won't be very large or very variable
 
 	// Empty the batch rendering data structures, but keep their key mappings around for the next frames
 	for (OverlayRendererInternals::QuadBatchMap::iterator it = m->quadBatchMap.begin(); it != m->quadBatchMap.end(); ++it)
 	{
 		QuadBatchData& quadBatchData = (it->second);
 		quadBatchData.m_Quads.clear();
 		quadBatchData.m_NumRenderQuads = 0;
 		quadBatchData.m_IndicesBase = 0;
 	}
 }
 
 void OverlayRenderer::PrepareForRendering()
 {
 	PROFILE3("prepare overlays");
 
 	// This is where we should do something like sort the overlays by
 	// color/sprite/etc for more efficient rendering
 
 	for (size_t i = 0; i < m->texlines.size(); ++i)
 	{
 		SOverlayTexturedLine* line = m->texlines[i];
 		if (!line->m_RenderData)
 		{
 			line->m_RenderData = std::make_shared<CTexturedLineRData>();
 			line->m_RenderData->Update(*line);
 			// We assume the overlay line will get replaced by the caller
 			// if terrain changes, so we don't need to detect that here and
 			// call Update again. Also we assume the caller won't change
 			// any of the parameters after first submitting the line.
 		}
 	}
 
 	// Group quad overlays by their texture/mask combination for efficient rendering
 	// TODO: consider doing this directly in Submit()
 	for (size_t i = 0; i < m->quads.size(); ++i)
 	{
 		SOverlayQuad* const quad = m->quads[i];
 
 		QuadBatchKey textures(quad->m_Texture, quad->m_TextureMask);
 		QuadBatchData& batchRenderData = m->quadBatchMap[textures]; // will create entry if it doesn't already exist
 
 		// add overlay to list of quads
 		batchRenderData.m_Quads.push_back(quad);
 	}
 
 	const CVector3D vOffset(0, OverlayRenderer::OVERLAY_VOFFSET, 0);
 
 	// Write quad overlay vertices/indices to VA backing store
 	VertexArrayIterator<CVector3D> vertexPos = m->quadAttributePos.GetIterator<CVector3D>();
 	VertexArrayIterator<CVector4D> vertexColor = m->quadAttributeColor.GetIterator<CVector4D>();
 	VertexArrayIterator<short[2]> vertexUV = m->quadAttributeUV.GetIterator<short[2]>();
 
 	size_t indicesIdx = 0;
 	size_t totalNumQuads = 0;
 
 	for (OverlayRendererInternals::QuadBatchMap::iterator it = m->quadBatchMap.begin(); it != m->quadBatchMap.end(); ++it)
 	{
 		QuadBatchData& batchRenderData = (it->second);
 		batchRenderData.m_NumRenderQuads = 0;
 
 		if (batchRenderData.m_Quads.empty())
 			continue;
 
 		// Remember the current index into the (entire) indices array as our base offset for this batch
 		batchRenderData.m_IndicesBase = indicesIdx;
 
 		// points to the index where each iteration's vertices will be appended
 		for (size_t i = 0; i < batchRenderData.m_Quads.size() && totalNumQuads < OverlayRendererInternals::MAX_QUAD_OVERLAYS; i++)
 		{
 			const SOverlayQuad* quad = batchRenderData.m_Quads[i];
 
 			// TODO: this is kind of ugly, the iterator should use a type that can have quad->m_Color assigned
 			// to it directly
 			const CVector4D quadColor(quad->m_Color.r, quad->m_Color.g, quad->m_Color.b, quad->m_Color.a);
 
 			*vertexPos++ = quad->m_Corners[0] + vOffset;
 			*vertexPos++ = quad->m_Corners[1] + vOffset;
 			*vertexPos++ = quad->m_Corners[2] + vOffset;
 			*vertexPos++ = quad->m_Corners[3] + vOffset;
 
 			(*vertexUV)[0] = 0;
 			(*vertexUV)[1] = 0;
 			++vertexUV;
 			(*vertexUV)[0] = 0;
 			(*vertexUV)[1] = 1;
 			++vertexUV;
 			(*vertexUV)[0] = 1;
 			(*vertexUV)[1] = 1;
 			++vertexUV;
 			(*vertexUV)[0] = 1;
 			(*vertexUV)[1] = 0;
 			++vertexUV;
 
 			*vertexColor++ = quadColor;
 			*vertexColor++ = quadColor;
 			*vertexColor++ = quadColor;
 			*vertexColor++ = quadColor;
 
 			indicesIdx += 6;
 
 			totalNumQuads++;
 			batchRenderData.m_NumRenderQuads++;
 		}
 	}
 
 	m->quadVertices.Upload();
 	// don't free the backing store! we'll overwrite it on the next frame to save a reallocation.
 
 	m->quadVertices.PrepareForRendering();
 }
 
 void OverlayRenderer::RenderOverlaysBeforeWater(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	PROFILE3_GPU("overlays (before)");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render overlays before water");
 
 #if CONFIG2_GLES
 #warning TODO: implement OverlayRenderer::RenderOverlaysBeforeWater for GLES
 #else
 	for (SOverlayLine* line : m->lines)
 	{
 		if (line->m_Coords.empty())
 			continue;
 
 		g_Renderer.GetDebugRenderer().DrawLine(line->m_Coords, line->m_Color, static_cast<float>(line->m_Thickness));
 	}
 #endif
 }
 
 void OverlayRenderer::RenderOverlaysAfterWater(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	PROFILE3_GPU("overlays (after)");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render overlays after water");
 
 	RenderTexturedOverlayLines(deviceCommandContext);
 	RenderQuadOverlays(deviceCommandContext);
 	RenderSphereOverlays(deviceCommandContext);
 }
 
 void OverlayRenderer::RenderTexturedOverlayLines(Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 #if CONFIG2_GLES
 #warning TODO: implement OverlayRenderer::RenderTexturedOverlayLines for GLES
 	return;
 #endif
 	if (m->texlines.empty())
 		return;
 
 	ogl_WarnIfError();
 
 	CLOSTexture& los = g_Renderer.GetSceneRenderer().GetScene().GetLOSTexture();
 
 	// ----------------------------------------------------------------------------------------
 
 	CShaderTechniquePtr shaderTechTexLineNormal = GetOverlayLineShaderTechnique(m->defsOverlayLineNormal);
 	if (shaderTechTexLineNormal)
 	{
 		Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 			shaderTechTexLineNormal->GetGraphicsPipelineStateDesc();
 		pipelineStateDesc.depthStencilState.depthWriteEnabled = false;
 		pipelineStateDesc.blendState.enabled = true;
 		pipelineStateDesc.blendState.srcColorBlendFactor = pipelineStateDesc.blendState.srcAlphaBlendFactor =
 			Renderer::Backend::BlendFactor::SRC_ALPHA;
 		pipelineStateDesc.blendState.dstColorBlendFactor = pipelineStateDesc.blendState.dstAlphaBlendFactor =
 			Renderer::Backend::BlendFactor::ONE_MINUS_SRC_ALPHA;
 		pipelineStateDesc.blendState.colorBlendOp = pipelineStateDesc.blendState.alphaBlendOp =
 			Renderer::Backend::BlendOp::ADD;
 		if (g_Renderer.GetSceneRenderer().GetOverlayRenderMode() == WIREFRAME)
 			pipelineStateDesc.rasterizationState.polygonMode = Renderer::Backend::PolygonMode::LINE;
 		shaderTechTexLineNormal->BeginPass();
 		deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 
 		CShaderProgramPtr shaderTexLineNormal = shaderTechTexLineNormal->GetShader();
 
 		shaderTexLineNormal->BindTexture(str_losTex, los.GetTexture());
 		shaderTexLineNormal->Uniform(str_losTransform, los.GetTextureMatrix()[0], los.GetTextureMatrix()[12], 0.f, 0.f);
 
 		shaderTexLineNormal->Uniform(str_transform, g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection());
 
 		// batch render only the non-always-visible overlay lines using the normal shader
 		RenderTexturedOverlayLines(deviceCommandContext, shaderTexLineNormal, false);
 
 		shaderTechTexLineNormal->EndPass();
 	}
 
 	// ----------------------------------------------------------------------------------------
 
 	CShaderTechniquePtr shaderTechTexLineAlwaysVisible = GetOverlayLineShaderTechnique(m->defsOverlayLineAlwaysVisible);
 	if (shaderTechTexLineAlwaysVisible)
 	{
 		Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 			shaderTechTexLineAlwaysVisible->GetGraphicsPipelineStateDesc();
 		pipelineStateDesc.depthStencilState.depthWriteEnabled = false;
 		pipelineStateDesc.blendState.enabled = true;
 		pipelineStateDesc.blendState.srcColorBlendFactor = pipelineStateDesc.blendState.srcAlphaBlendFactor =
 			Renderer::Backend::BlendFactor::SRC_ALPHA;
 		pipelineStateDesc.blendState.dstColorBlendFactor = pipelineStateDesc.blendState.dstAlphaBlendFactor =
 			Renderer::Backend::BlendFactor::ONE_MINUS_SRC_ALPHA;
 		pipelineStateDesc.blendState.colorBlendOp = pipelineStateDesc.blendState.alphaBlendOp =
 			Renderer::Backend::BlendOp::ADD;
 		if (g_Renderer.GetSceneRenderer().GetOverlayRenderMode() == WIREFRAME)
 			pipelineStateDesc.rasterizationState.polygonMode = Renderer::Backend::PolygonMode::LINE;
 		shaderTechTexLineAlwaysVisible->BeginPass();
 		deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 
 		CShaderProgramPtr shaderTexLineAlwaysVisible = shaderTechTexLineAlwaysVisible->GetShader();
 
 		// TODO: losTex and losTransform are unused in the always visible shader; see if these can be safely omitted
 		shaderTexLineAlwaysVisible->BindTexture(str_losTex, los.GetTexture());
 		shaderTexLineAlwaysVisible->Uniform(str_losTransform, los.GetTextureMatrix()[0], los.GetTextureMatrix()[12], 0.f, 0.f);
 
 		shaderTexLineAlwaysVisible->Uniform(str_transform, g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection());
 
 		// batch render only the always-visible overlay lines using the LoS-ignored shader
 		RenderTexturedOverlayLines(deviceCommandContext, shaderTexLineAlwaysVisible, true);
 
 		shaderTechTexLineAlwaysVisible->EndPass();
 	}
 
 	// ----------------------------------------------------------------------------------------
 
 	// TODO: the shaders should probably be responsible for unbinding their textures
 	deviceCommandContext->BindTexture(1, GL_TEXTURE_2D, 0);
 	deviceCommandContext->BindTexture(0, GL_TEXTURE_2D, 0);
 
 	CVertexBuffer::Unbind(deviceCommandContext);
 }
 
 void OverlayRenderer::RenderTexturedOverlayLines(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CShaderProgramPtr& shader, bool alwaysVisible)
 {
 	for (size_t i = 0; i < m->texlines.size(); ++i)
 	{
 		SOverlayTexturedLine* line = m->texlines[i];
 
 		// render only those lines matching the requested alwaysVisible status
 		if (!line->m_RenderData || line->m_AlwaysVisible != alwaysVisible)
 			continue;
 
 		ENSURE(line->m_RenderData);
 		line->m_RenderData->Render(deviceCommandContext, *line, shader);
 	}
 }
 
 void OverlayRenderer::RenderQuadOverlays(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 #if CONFIG2_GLES
 #warning TODO: implement OverlayRenderer::RenderQuadOverlays for GLES
 	return;
 #endif
 	if (m->quadBatchMap.empty())
 		return;
 
 	CShaderTechniquePtr shaderTech = GetOverlayLineShaderTechnique(m->defsQuadOverlay);
 
 	if (!shaderTech)
 		return;
 
 	Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 		shaderTech->GetGraphicsPipelineStateDesc();
 	pipelineStateDesc.depthStencilState.depthWriteEnabled = false;
 	pipelineStateDesc.blendState.enabled = true;
 	pipelineStateDesc.blendState.srcColorBlendFactor = pipelineStateDesc.blendState.srcAlphaBlendFactor =
 		Renderer::Backend::BlendFactor::SRC_ALPHA;
 	pipelineStateDesc.blendState.dstColorBlendFactor = pipelineStateDesc.blendState.dstAlphaBlendFactor =
 		Renderer::Backend::BlendFactor::ONE_MINUS_SRC_ALPHA;
 	pipelineStateDesc.blendState.colorBlendOp = pipelineStateDesc.blendState.alphaBlendOp =
 		Renderer::Backend::BlendOp::ADD;
 	if (g_Renderer.GetSceneRenderer().GetOverlayRenderMode() == WIREFRAME)
 		pipelineStateDesc.rasterizationState.polygonMode = Renderer::Backend::PolygonMode::LINE;
 	shaderTech->BeginPass();
 	deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 
 	const CShaderProgramPtr& shader = shaderTech->GetShader();
 
 	CLOSTexture& los = g_Renderer.GetSceneRenderer().GetScene().GetLOSTexture();
 
 	shader->BindTexture(str_losTex, los.GetTexture());
 	shader->Uniform(str_losTransform, los.GetTextureMatrix()[0], los.GetTextureMatrix()[12], 0.f, 0.f);
 
 	shader->Uniform(str_transform, g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection());
 
 	// Base offsets (in bytes) of the two backing stores relative to their owner VBO
-	u8* indexBase = m->quadIndices.Bind(deviceCommandContext);
+	m->quadIndices.UploadIfNeeded(deviceCommandContext);
 	u8* vertexBase = m->quadVertices.Bind(deviceCommandContext);
-	GLsizei indexStride = m->quadIndices.GetStride();
 	GLsizei vertexStride = m->quadVertices.GetStride();
 
+	deviceCommandContext->SetIndexBuffer(m->quadIndices.GetBuffer());
+
 	for (OverlayRendererInternals::QuadBatchMap::iterator it = m->quadBatchMap.begin(); it != m->quadBatchMap.end(); ++it)
 	{
 		QuadBatchData& batchRenderData = it->second;
 		const size_t batchNumQuads = batchRenderData.m_NumRenderQuads;
 
 		// Careful; some drivers don't like drawing calls with 0 stuff to draw.
 		if (batchNumQuads == 0)
 			continue;
 
 		const QuadBatchKey& maskPair = it->first;
 
 		maskPair.m_Texture->UploadBackendTextureIfNeeded(deviceCommandContext);
 		maskPair.m_TextureMask->UploadBackendTextureIfNeeded(deviceCommandContext);
 		shader->BindTexture(str_baseTex, maskPair.m_Texture->GetBackendTexture());
 		shader->BindTexture(str_maskTex, maskPair.m_TextureMask->GetBackendTexture());
 
 		int streamflags = shader->GetStreamFlags();
 
 		if (streamflags & STREAM_POS)
 			shader->VertexPointer(m->quadAttributePos.elems, m->quadAttributePos.type, vertexStride, vertexBase + m->quadAttributePos.offset);
 
 		if (streamflags & STREAM_UV0)
 			shader->TexCoordPointer(GL_TEXTURE0, m->quadAttributeUV.elems, m->quadAttributeUV.type, vertexStride, vertexBase + m->quadAttributeUV.offset);
 
 		if (streamflags & STREAM_UV1)
 			shader->TexCoordPointer(GL_TEXTURE1, m->quadAttributeUV.elems, m->quadAttributeUV.type, vertexStride, vertexBase + m->quadAttributeUV.offset);
 
 		if (streamflags & STREAM_COLOR)
 			shader->ColorPointer(m->quadAttributeColor.elems, m->quadAttributeColor.type, vertexStride, vertexBase + m->quadAttributeColor.offset);
 
 		shader->AssertPointersBound();
-		glDrawElements(GL_TRIANGLES, (GLsizei)(batchNumQuads * 6), GL_UNSIGNED_SHORT, indexBase + indexStride * batchRenderData.m_IndicesBase);
+		deviceCommandContext->DrawIndexed(m->quadIndices.GetOffset() + batchRenderData.m_IndicesBase, batchNumQuads * 6, 0);
 
 		g_Renderer.GetStats().m_DrawCalls++;
 		g_Renderer.GetStats().m_OverlayTris += batchNumQuads*2;
 	}
 
 	shaderTech->EndPass();
 
 	// TODO: the shader should probably be responsible for unbinding its textures
 	deviceCommandContext->BindTexture(1, GL_TEXTURE_2D, 0);
 	deviceCommandContext->BindTexture(0, GL_TEXTURE_2D, 0);
 
 	CVertexBuffer::Unbind(deviceCommandContext);
 }
 
 void OverlayRenderer::RenderForegroundOverlays(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CCamera& viewCamera)
 {
 	PROFILE3_GPU("overlays (fg)");
 
 #if CONFIG2_GLES
 	UNUSED2(deviceCommandContext);
 	UNUSED2(viewCamera);
 	#warning TODO: implement OverlayRenderer::RenderForegroundOverlays for GLES
 #else
 	CVector3D right = -viewCamera.GetOrientation().GetLeft();
 	CVector3D up = viewCamera.GetOrientation().GetUp();
 
 	CShaderTechniquePtr tech = g_Renderer.GetShaderManager().LoadEffect(str_foreground_overlay);
 	Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 		tech->GetGraphicsPipelineStateDesc();
 	pipelineStateDesc.depthStencilState.depthTestEnabled = false;
 	pipelineStateDesc.blendState.enabled = true;
 	pipelineStateDesc.blendState.srcColorBlendFactor = pipelineStateDesc.blendState.srcAlphaBlendFactor =
 		Renderer::Backend::BlendFactor::SRC_ALPHA;
 	pipelineStateDesc.blendState.dstColorBlendFactor = pipelineStateDesc.blendState.dstAlphaBlendFactor =
 		Renderer::Backend::BlendFactor::ONE_MINUS_SRC_ALPHA;
 	pipelineStateDesc.blendState.colorBlendOp = pipelineStateDesc.blendState.alphaBlendOp =
 		Renderer::Backend::BlendOp::ADD;
 	if (g_Renderer.GetSceneRenderer().GetOverlayRenderMode() == WIREFRAME)
 		pipelineStateDesc.rasterizationState.polygonMode = Renderer::Backend::PolygonMode::LINE;
 	tech->BeginPass();
 	deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 
 	const CShaderProgramPtr& shader = tech->GetShader();
 
 	shader->Uniform(str_transform, g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection());
 
 	const CVector2D uvs[6] =
 	{
 		{0.0f, 1.0f},
 		{1.0f, 1.0f},
 		{1.0f, 0.0f},
 		{0.0f, 1.0f},
 		{1.0f, 0.0f},
 		{0.0f, 0.0f},
 	};
 
 	shader->TexCoordPointer(GL_TEXTURE0, 2, GL_FLOAT, sizeof(CVector2D), &uvs[0]);
 
 	for (size_t i = 0; i < m->sprites.size(); ++i)
 	{
 		SOverlaySprite* sprite = m->sprites[i];
 		if (!i || sprite->m_Texture != m->sprites[i - 1]->m_Texture)
 		{
 			sprite->m_Texture->UploadBackendTextureIfNeeded(deviceCommandContext);
 			shader->BindTexture(str_baseTex, sprite->m_Texture->GetBackendTexture());
 		}
 
 		shader->Uniform(str_colorMul, sprite->m_Color);
 
 		const CVector3D position[6] =
 		{
 			sprite->m_Position + right*sprite->m_X0 + up*sprite->m_Y0,
 			sprite->m_Position + right*sprite->m_X1 + up*sprite->m_Y0,
 			sprite->m_Position + right*sprite->m_X1 + up*sprite->m_Y1,
 			sprite->m_Position + right*sprite->m_X0 + up*sprite->m_Y0,
 			sprite->m_Position + right*sprite->m_X1 + up*sprite->m_Y1,
 			sprite->m_Position + right*sprite->m_X0 + up*sprite->m_Y1
 		};
 
 		shader->VertexPointer(3, GL_FLOAT, sizeof(CVector3D), &position[0].X);
 
-		glDrawArrays(GL_TRIANGLES, 0, 6);
+		deviceCommandContext->Draw(0, 6);
 
 		g_Renderer.GetStats().m_DrawCalls++;
 		g_Renderer.GetStats().m_OverlayTris += 2;
 	}
 
 	tech->EndPass();
 #endif
 }
 
 static void TessellateSphereFace(const CVector3D& a, u16 ai,
 								 const CVector3D& b, u16 bi,
 								 const CVector3D& c, u16 ci,
 								 std::vector<float>& vertexes, std::vector<u16>& indexes, int level)
 {
 	if (level == 0)
 	{
 		indexes.push_back(ai);
 		indexes.push_back(bi);
 		indexes.push_back(ci);
 	}
 	else
 	{
 		CVector3D d = (a + b).Normalized();
 		CVector3D e = (b + c).Normalized();
 		CVector3D f = (c + a).Normalized();
 		int di = vertexes.size() / 3; vertexes.push_back(d.X); vertexes.push_back(d.Y); vertexes.push_back(d.Z);
 		int ei = vertexes.size() / 3; vertexes.push_back(e.X); vertexes.push_back(e.Y); vertexes.push_back(e.Z);
 		int fi = vertexes.size() / 3; vertexes.push_back(f.X); vertexes.push_back(f.Y); vertexes.push_back(f.Z);
 		TessellateSphereFace(a,ai, d,di, f,fi, vertexes, indexes, level-1);
 		TessellateSphereFace(d,di, b,bi, e,ei, vertexes, indexes, level-1);
 		TessellateSphereFace(f,fi, e,ei, c,ci, vertexes, indexes, level-1);
 		TessellateSphereFace(d,di, e,ei, f,fi, vertexes, indexes, level-1);
 	}
 }
 
 static void TessellateSphere(std::vector<float>& vertexes, std::vector<u16>& indexes, int level)
 {
 	/* Start with a tetrahedron, then tessellate */
 	float s = sqrtf(0.5f);
 #define VERT(a,b,c) vertexes.push_back(a); vertexes.push_back(b); vertexes.push_back(c);
 	VERT(-s,  0, -s);
 	VERT( s,  0, -s);
 	VERT( s,  0,  s);
 	VERT(-s,  0,  s);
 	VERT( 0, -1,  0);
 	VERT( 0,  1,  0);
 #define FACE(a,b,c) \
 	TessellateSphereFace( \
 		CVector3D(vertexes[a*3], vertexes[a*3+1], vertexes[a*3+2]), a, \
 		CVector3D(vertexes[b*3], vertexes[b*3+1], vertexes[b*3+2]), b, \
 		CVector3D(vertexes[c*3], vertexes[c*3+1], vertexes[c*3+2]), c, \
 		vertexes, indexes, level);
 	FACE(0,4,1);
 	FACE(1,4,2);
 	FACE(2,4,3);
 	FACE(3,4,0);
 	FACE(1,5,0);
 	FACE(2,5,1);
 	FACE(3,5,2);
 	FACE(0,5,3);
 #undef FACE
 #undef VERT
 }
 
 void OverlayRendererInternals::GenerateSphere()
 {
 	if (sphereVertexes.empty())
 		TessellateSphere(sphereVertexes, sphereIndexes, 3);
 }
 
 void OverlayRenderer::RenderSphereOverlays(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	PROFILE3_GPU("overlays (spheres)");
 
 #if CONFIG2_GLES
 	UNUSED2(deviceCommandContext);
 #warning TODO: implement OverlayRenderer::RenderSphereOverlays for GLES
 #else
 	if (m->spheres.empty())
 		return;
 
 	CShaderProgramPtr shader;
 	CShaderTechniquePtr tech;
 
 	tech = g_Renderer.GetShaderManager().LoadEffect(str_overlay_solid);
 	Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 		tech->GetGraphicsPipelineStateDesc();
 	pipelineStateDesc.depthStencilState.depthWriteEnabled = false;
 	pipelineStateDesc.blendState.enabled = true;
 	pipelineStateDesc.blendState.srcColorBlendFactor = pipelineStateDesc.blendState.srcAlphaBlendFactor =
 		Renderer::Backend::BlendFactor::SRC_ALPHA;
 	pipelineStateDesc.blendState.dstColorBlendFactor = pipelineStateDesc.blendState.dstAlphaBlendFactor =
 		Renderer::Backend::BlendFactor::ONE_MINUS_SRC_ALPHA;
 	pipelineStateDesc.blendState.colorBlendOp = pipelineStateDesc.blendState.alphaBlendOp =
 		Renderer::Backend::BlendOp::ADD;
 	tech->BeginPass();
 	deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 
 	shader = tech->GetShader();
 
 	m->GenerateSphere();
 
 	shader->VertexPointer(3, GL_FLOAT, 0, &m->sphereVertexes[0]);
 
 	for (size_t i = 0; i < m->spheres.size(); ++i)
 	{
 		SOverlaySphere* sphere = m->spheres[i];
 
 		CMatrix3D transform;
 		transform.SetIdentity();
 		transform.Scale(sphere->m_Radius, sphere->m_Radius, sphere->m_Radius);
 		transform.Translate(sphere->m_Center);
 
 		shader->Uniform(str_transform, g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection());
 		shader->Uniform(str_instancingTransform, transform);
 
 		shader->Uniform(str_color, sphere->m_Color);
 
-		glDrawElements(GL_TRIANGLES, m->sphereIndexes.size(), GL_UNSIGNED_SHORT, &m->sphereIndexes[0]);
+		deviceCommandContext->SetIndexBufferData(m->sphereIndexes.data());
+		deviceCommandContext->DrawIndexed(0, m->sphereIndexes.size(), 0);
 
 		g_Renderer.GetStats().m_DrawCalls++;
 		g_Renderer.GetStats().m_OverlayTris = m->sphereIndexes.size()/3;
 	}
 
 	tech->EndPass();
 #endif
 }
Index: ps/trunk/source/renderer/PatchRData.cpp
===================================================================
--- ps/trunk/source/renderer/PatchRData.cpp	(revision 26524)
+++ ps/trunk/source/renderer/PatchRData.cpp	(revision 26525)
@@ -1,1482 +1,1478 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "renderer/PatchRData.h"
 
 #include "graphics/GameView.h"
 #include "graphics/LightEnv.h"
 #include "graphics/LOSTexture.h"
 #include "graphics/Patch.h"
 #include "graphics/ShaderManager.h"
 #include "graphics/Terrain.h"
 #include "graphics/TerrainTextureEntry.h"
 #include "graphics/TextRenderer.h"
 #include "graphics/TextureManager.h"
 #include "lib/allocators/DynamicArena.h"
 #include "lib/allocators/STLAllocators.h"
 #include "maths/MathUtil.h"
 #include "ps/CLogger.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Game.h"
 #include "ps/GameSetup/Config.h"
 #include "ps/Profile.h"
 #include "ps/Pyrogenesis.h"
 #include "ps/VideoMode.h"
 #include "ps/World.h"
 #include "renderer/AlphaMapCalculator.h"
 #include "renderer/DebugRenderer.h"
 #include "renderer/Renderer.h"
 #include "renderer/SceneRenderer.h"
 #include "renderer/TerrainRenderer.h"
 #include "renderer/WaterManager.h"
 #include "simulation2/components/ICmpWaterManager.h"
 #include "simulation2/Simulation2.h"
 
 #include <algorithm>
 #include <numeric>
 #include <set>
 
 const ssize_t BlendOffsets[9][2] = {
 	{  0, -1 },
 	{ -1, -1 },
 	{ -1,  0 },
 	{ -1,  1 },
 	{  0,  1 },
 	{  1,  1 },
 	{  1,  0 },
 	{  1, -1 },
 	{  0,  0 }
 };
 
 CPatchRData::CPatchRData(CPatch* patch, CSimulation2* simulation) :
 	m_Patch(patch), m_Simulation(simulation)
 {
 	ENSURE(patch);
 	Build();
 }
 
 CPatchRData::~CPatchRData() = default;
 
 /**
  * Represents a blend for a single tile, texture and shape.
  */
 struct STileBlend
 {
 	CTerrainTextureEntry* m_Texture;
 	int m_Priority;
 	u16 m_TileMask; // bit n set if this blend contains neighbour tile BlendOffsets[n]
 
 	struct DecreasingPriority
 	{
 		bool operator()(const STileBlend& a, const STileBlend& b) const
 		{
 			if (a.m_Priority > b.m_Priority)
 				return true;
 			if (a.m_Priority < b.m_Priority)
 				return false;
 			if (a.m_Texture && b.m_Texture)
 				return a.m_Texture->GetTag() > b.m_Texture->GetTag();
 			return false;
 		}
 	};
 
 	struct CurrentTile
 	{
 		bool operator()(const STileBlend& a) const
 		{
 			return (a.m_TileMask & (1 << 8)) != 0;
 		}
 	};
 };
 
 /**
  * Represents the ordered collection of blends drawn on a particular tile.
  */
 struct STileBlendStack
 {
 	u8 i, j;
 	std::vector<STileBlend> blends; // back of vector is lowest-priority texture
 };
 
 /**
  * Represents a batched collection of blends using the same texture.
  */
 struct SBlendLayer
 {
 	struct Tile
 	{
 		u8 i, j;
 		u8 shape;
 	};
 
 	CTerrainTextureEntry* m_Texture;
 	std::vector<Tile> m_Tiles;
 };
 
 void CPatchRData::BuildBlends()
 {
 	PROFILE3("build blends");
 
 	m_BlendSplats.clear();
 
 	std::vector<SBlendVertex> blendVertices;
 	std::vector<u16> blendIndices;
 
 	CTerrain* terrain = m_Patch->m_Parent;
 
 	std::vector<STileBlendStack> blendStacks;
 	blendStacks.reserve(PATCH_SIZE*PATCH_SIZE);
 
 	std::vector<STileBlend> blends;
 	blends.reserve(9);
 
 	// For each tile in patch ..
 	for (ssize_t j = 0; j < PATCH_SIZE; ++j)
 	{
 		for (ssize_t i = 0; i < PATCH_SIZE; ++i)
 		{
 			ssize_t gx = m_Patch->m_X * PATCH_SIZE + i;
 			ssize_t gz = m_Patch->m_Z * PATCH_SIZE + j;
 
 			blends.clear();
 
 			// Compute a blend for every tile in the 3x3 square around this tile
 			for (size_t n = 0; n < 9; ++n)
 			{
 				ssize_t ox = gx + BlendOffsets[n][1];
 				ssize_t oz = gz + BlendOffsets[n][0];
 
 				CMiniPatch* nmp = terrain->GetTile(ox, oz);
 				if (!nmp)
 					continue;
 
 				STileBlend blend;
 				blend.m_Texture = nmp->GetTextureEntry();
 				blend.m_Priority = nmp->GetPriority();
 				blend.m_TileMask = 1 << n;
 				blends.push_back(blend);
 			}
 
 			// Sort the blends, highest priority first
 			std::sort(blends.begin(), blends.end(), STileBlend::DecreasingPriority());
 
 			STileBlendStack blendStack;
 			blendStack.i = i;
 			blendStack.j = j;
 
 			// Put the blends into the tile's stack, merging any adjacent blends with the same texture
 			for (size_t k = 0; k < blends.size(); ++k)
 			{
 				if (!blendStack.blends.empty() && blendStack.blends.back().m_Texture == blends[k].m_Texture)
 					blendStack.blends.back().m_TileMask |= blends[k].m_TileMask;
 				else
 					blendStack.blends.push_back(blends[k]);
 			}
 
 			// Remove blends that are after (i.e. lower priority than) the current tile
 			// (including the current tile), since we don't want to render them on top of
 			// the tile's base texture
 			blendStack.blends.erase(
 				std::find_if(blendStack.blends.begin(), blendStack.blends.end(), STileBlend::CurrentTile()),
 				blendStack.blends.end());
 
 			blendStacks.push_back(blendStack);
 		}
 	}
 
 	// Given the blend stack per tile, we want to batch together as many blends as possible.
 	// Group them into a series of layers (each of which has a single texture):
 	// (This is effectively a topological sort / linearisation of the partial order induced
 	// by the per-tile stacks, preferring to make tiles with equal textures adjacent.)
 
 	std::vector<SBlendLayer> blendLayers;
 
 	while (true)
 	{
 		if (!blendLayers.empty())
 		{
 			// Try to grab as many tiles as possible that match our current layer,
 			// from off the blend stacks of all the tiles
 
 			CTerrainTextureEntry* tex = blendLayers.back().m_Texture;
 
 			for (size_t k = 0; k < blendStacks.size(); ++k)
 			{
 				if (!blendStacks[k].blends.empty() && blendStacks[k].blends.back().m_Texture == tex)
 				{
 					SBlendLayer::Tile t = { blendStacks[k].i, blendStacks[k].j, (u8)blendStacks[k].blends.back().m_TileMask };
 					blendLayers.back().m_Tiles.push_back(t);
 					blendStacks[k].blends.pop_back();
 				}
 				// (We've already merged adjacent entries of the same texture in each stack,
 				// so we don't need to bother looping to check the next entry in this stack again)
 			}
 		}
 
 		// We've grabbed as many tiles as possible; now we need to start a new layer.
 		// The new layer's texture could come from the back of any non-empty stack;
 		// choose the longest stack as a heuristic to reduce the number of layers
 		CTerrainTextureEntry* bestTex = NULL;
 		size_t bestStackSize = 0;
 
 		for (size_t k = 0; k < blendStacks.size(); ++k)
 		{
 			if (blendStacks[k].blends.size() > bestStackSize)
 			{
 				bestStackSize = blendStacks[k].blends.size();
 				bestTex = blendStacks[k].blends.back().m_Texture;
 			}
 		}
 
 		// If all our stacks were empty, we're done
 		if (bestStackSize == 0)
 			break;
 
 		// Otherwise add the new layer, then loop back and start filling it in
 
 		SBlendLayer layer;
 		layer.m_Texture = bestTex;
 		blendLayers.push_back(layer);
 	}
 
 	// Now build outgoing splats
 	m_BlendSplats.resize(blendLayers.size());
 
 	for (size_t k = 0; k < blendLayers.size(); ++k)
 	{
 		SSplat& splat = m_BlendSplats[k];
 		splat.m_IndexStart = blendIndices.size();
 		splat.m_Texture = blendLayers[k].m_Texture;
 
 		for (size_t t = 0; t < blendLayers[k].m_Tiles.size(); ++t)
 		{
 			SBlendLayer::Tile& tile = blendLayers[k].m_Tiles[t];
 			AddBlend(blendVertices, blendIndices, tile.i, tile.j, tile.shape, splat.m_Texture);
 		}
 
 		splat.m_IndexCount = blendIndices.size() - splat.m_IndexStart;
 	}
 
 	// Release existing vertex buffer chunks
 	m_VBBlends.Reset();
 	m_VBBlendIndices.Reset();
 
 	if (blendVertices.size())
 	{
 		// Construct vertex buffer
 
 		m_VBBlends = g_VBMan.AllocateChunk(
 			sizeof(SBlendVertex), blendVertices.size(),
 			Renderer::Backend::GL::CBuffer::Type::VERTEX, false,
 			nullptr, CVertexBufferManager::Group::TERRAIN);
 		m_VBBlends->m_Owner->UpdateChunkVertices(m_VBBlends.Get(), &blendVertices[0]);
 
 		// Update the indices to include the base offset of the vertex data
 		for (size_t k = 0; k < blendIndices.size(); ++k)
 			blendIndices[k] += static_cast<u16>(m_VBBlends->m_Index);
 
 		m_VBBlendIndices = g_VBMan.AllocateChunk(
 			sizeof(u16), blendIndices.size(),
 			Renderer::Backend::GL::CBuffer::Type::INDEX, false,
 			nullptr, CVertexBufferManager::Group::TERRAIN);
 		m_VBBlendIndices->m_Owner->UpdateChunkVertices(m_VBBlendIndices.Get(), &blendIndices[0]);
 	}
 }
 
 void CPatchRData::AddBlend(std::vector<SBlendVertex>& blendVertices, std::vector<u16>& blendIndices,
 			   u16 i, u16 j, u8 shape, CTerrainTextureEntry* texture)
 {
 	CTerrain* terrain = m_Patch->m_Parent;
 
 	ssize_t gx = m_Patch->m_X * PATCH_SIZE + i;
 	ssize_t gz = m_Patch->m_Z * PATCH_SIZE + j;
 
 	// uses the current neighbour texture
 	BlendShape8 shape8;
 	for (size_t m = 0; m < 8; ++m)
 		shape8[m] = (shape & (1 << m)) ? 0 : 1;
 
 	// calculate the required alphamap and the required rotation of the alphamap from blendshape
 	unsigned int alphamapflags;
 	int alphamap = CAlphaMapCalculator::Calculate(shape8, alphamapflags);
 
 	// now actually render the blend tile (if we need one)
 	if (alphamap == -1)
 		return;
 
 	float u0 = texture->m_TerrainAlpha->second.m_AlphaMapCoords[alphamap].u0;
 	float u1 = texture->m_TerrainAlpha->second.m_AlphaMapCoords[alphamap].u1;
 	float v0 = texture->m_TerrainAlpha->second.m_AlphaMapCoords[alphamap].v0;
 	float v1 = texture->m_TerrainAlpha->second.m_AlphaMapCoords[alphamap].v1;
 
 	if (alphamapflags & BLENDMAP_FLIPU)
 		std::swap(u0, u1);
 
 	if (alphamapflags & BLENDMAP_FLIPV)
 		std::swap(v0, v1);
 
 	int base = 0;
 	if (alphamapflags & BLENDMAP_ROTATE90)
 		base = 1;
 	else if (alphamapflags & BLENDMAP_ROTATE180)
 		base = 2;
 	else if (alphamapflags & BLENDMAP_ROTATE270)
 		base = 3;
 
 	SBlendVertex vtx[4];
 	vtx[(base + 0) % 4].m_AlphaUVs[0] = u0;
 	vtx[(base + 0) % 4].m_AlphaUVs[1] = v0;
 	vtx[(base + 1) % 4].m_AlphaUVs[0] = u1;
 	vtx[(base + 1) % 4].m_AlphaUVs[1] = v0;
 	vtx[(base + 2) % 4].m_AlphaUVs[0] = u1;
 	vtx[(base + 2) % 4].m_AlphaUVs[1] = v1;
 	vtx[(base + 3) % 4].m_AlphaUVs[0] = u0;
 	vtx[(base + 3) % 4].m_AlphaUVs[1] = v1;
 
 	SBlendVertex dst;
 
 	CVector3D normal;
 
 	u16 index = static_cast<u16>(blendVertices.size());
 
 	terrain->CalcPosition(gx, gz, dst.m_Position);
 	terrain->CalcNormal(gx, gz, normal);
 	dst.m_Normal = normal;
 	dst.m_AlphaUVs[0] = vtx[0].m_AlphaUVs[0];
 	dst.m_AlphaUVs[1] = vtx[0].m_AlphaUVs[1];
 	blendVertices.push_back(dst);
 
 	terrain->CalcPosition(gx + 1, gz, dst.m_Position);
 	terrain->CalcNormal(gx + 1, gz, normal);
 	dst.m_Normal = normal;
 	dst.m_AlphaUVs[0] = vtx[1].m_AlphaUVs[0];
 	dst.m_AlphaUVs[1] = vtx[1].m_AlphaUVs[1];
 	blendVertices.push_back(dst);
 
 	terrain->CalcPosition(gx + 1, gz + 1, dst.m_Position);
 	terrain->CalcNormal(gx + 1, gz + 1, normal);
 	dst.m_Normal = normal;
 	dst.m_AlphaUVs[0] = vtx[2].m_AlphaUVs[0];
 	dst.m_AlphaUVs[1] = vtx[2].m_AlphaUVs[1];
 	blendVertices.push_back(dst);
 
 	terrain->CalcPosition(gx, gz + 1, dst.m_Position);
 	terrain->CalcNormal(gx, gz + 1, normal);
 	dst.m_Normal = normal;
 	dst.m_AlphaUVs[0] = vtx[3].m_AlphaUVs[0];
 	dst.m_AlphaUVs[1] = vtx[3].m_AlphaUVs[1];
 	blendVertices.push_back(dst);
 
 	bool dir = terrain->GetTriangulationDir(gx, gz);
 	if (dir)
 	{
 		blendIndices.push_back(index+0);
 		blendIndices.push_back(index+1);
 		blendIndices.push_back(index+3);
 
 		blendIndices.push_back(index+1);
 		blendIndices.push_back(index+2);
 		blendIndices.push_back(index+3);
 	}
 	else
 	{
 		blendIndices.push_back(index+0);
 		blendIndices.push_back(index+1);
 		blendIndices.push_back(index+2);
 
 		blendIndices.push_back(index+2);
 		blendIndices.push_back(index+3);
 		blendIndices.push_back(index+0);
 	}
 }
 
 void CPatchRData::BuildIndices()
 {
 	PROFILE3("build indices");
 
 	CTerrain* terrain = m_Patch->m_Parent;
 
 	ssize_t px = m_Patch->m_X * PATCH_SIZE;
 	ssize_t pz = m_Patch->m_Z * PATCH_SIZE;
 
 	// must have allocated some vertices before trying to build corresponding indices
 	ENSURE(m_VBBase);
 
 	// number of vertices in each direction in each patch
 	ssize_t vsize=PATCH_SIZE+1;
 
 	// PATCH_SIZE must be 2^8-2 or less to not overflow u16 indices buffer. Thankfully this is always true.
 	ENSURE(vsize*vsize < 65536);
 
 	std::vector<unsigned short> indices;
 	indices.reserve(PATCH_SIZE * PATCH_SIZE * 4);
 
 	// release existing splats
 	m_Splats.clear();
 
 	// build grid of textures on this patch
 	std::vector<CTerrainTextureEntry*> textures;
 	CTerrainTextureEntry* texgrid[PATCH_SIZE][PATCH_SIZE];
 	for (ssize_t j=0;j<PATCH_SIZE;j++) {
 		for (ssize_t i=0;i<PATCH_SIZE;i++) {
 			CTerrainTextureEntry* tex=m_Patch->m_MiniPatches[j][i].GetTextureEntry();
 			texgrid[j][i]=tex;
 			if (std::find(textures.begin(),textures.end(),tex)==textures.end()) {
 				textures.push_back(tex);
 			}
 		}
 	}
 
 	// now build base splats from interior textures
 	m_Splats.resize(textures.size());
 	// build indices for base splats
 	size_t base=m_VBBase->m_Index;
 
 	for (size_t k = 0; k < m_Splats.size(); ++k)
 	{
 		CTerrainTextureEntry* tex = textures[k];
 
 		SSplat& splat=m_Splats[k];
 		splat.m_Texture=tex;
 		splat.m_IndexStart=indices.size();
 
 		for (ssize_t j = 0; j < PATCH_SIZE; j++)
 		{
 			for (ssize_t i = 0; i < PATCH_SIZE; i++)
 			{
 				if (texgrid[j][i] == tex)
 				{
 					bool dir = terrain->GetTriangulationDir(px+i, pz+j);
 					if (dir)
 					{
 						indices.push_back(u16(((j+0)*vsize+(i+0))+base));
 						indices.push_back(u16(((j+0)*vsize+(i+1))+base));
 						indices.push_back(u16(((j+1)*vsize+(i+0))+base));
 
 						indices.push_back(u16(((j+0)*vsize+(i+1))+base));
 						indices.push_back(u16(((j+1)*vsize+(i+1))+base));
 						indices.push_back(u16(((j+1)*vsize+(i+0))+base));
 					}
 					else
 					{
 						indices.push_back(u16(((j+0)*vsize+(i+0))+base));
 						indices.push_back(u16(((j+0)*vsize+(i+1))+base));
 						indices.push_back(u16(((j+1)*vsize+(i+1))+base));
 
 						indices.push_back(u16(((j+1)*vsize+(i+1))+base));
 						indices.push_back(u16(((j+1)*vsize+(i+0))+base));
 						indices.push_back(u16(((j+0)*vsize+(i+0))+base));
 					}
 				}
 			}
 		}
 		splat.m_IndexCount=indices.size()-splat.m_IndexStart;
 	}
 
 	// Release existing vertex buffer chunk
 	m_VBBaseIndices.Reset();
 
 	ENSURE(indices.size());
 
 	// Construct vertex buffer
 	m_VBBaseIndices = g_VBMan.AllocateChunk(
 		sizeof(u16), indices.size(),
 		Renderer::Backend::GL::CBuffer::Type::INDEX, false, nullptr, CVertexBufferManager::Group::TERRAIN);
 	m_VBBaseIndices->m_Owner->UpdateChunkVertices(m_VBBaseIndices.Get(), &indices[0]);
 }
 
 
 void CPatchRData::BuildVertices()
 {
 	PROFILE3("build vertices");
 
 	// create both vertices and lighting colors
 
 	// number of vertices in each direction in each patch
 	ssize_t vsize = PATCH_SIZE + 1;
 
 	std::vector<SBaseVertex> vertices;
 	vertices.resize(vsize * vsize);
 
 	// get index of this patch
 	ssize_t px = m_Patch->m_X;
 	ssize_t pz = m_Patch->m_Z;
 
 	CTerrain* terrain = m_Patch->m_Parent;
 
 	// build vertices
 	for (ssize_t j = 0; j < vsize; ++j)
 	{
 		for (ssize_t i = 0; i < vsize; ++i)
 		{
 			ssize_t ix = px * PATCH_SIZE + i;
 			ssize_t iz = pz * PATCH_SIZE + j;
 			ssize_t v = j * vsize + i;
 
 			// calculate vertex data
 			terrain->CalcPosition(ix, iz, vertices[v].m_Position);
 
 			CVector3D normal;
 			terrain->CalcNormal(ix, iz, normal);
 			vertices[v].m_Normal = normal;
 		}
 	}
 
 	// upload to vertex buffer
 	if (!m_VBBase)
 	{
 		m_VBBase = g_VBMan.AllocateChunk(
 			sizeof(SBaseVertex), vsize * vsize,
 			Renderer::Backend::GL::CBuffer::Type::VERTEX, false,
 			nullptr, CVertexBufferManager::Group::TERRAIN);
 	}
 
 	m_VBBase->m_Owner->UpdateChunkVertices(m_VBBase.Get(), &vertices[0]);
 }
 
 void CPatchRData::BuildSide(std::vector<SSideVertex>& vertices, CPatchSideFlags side)
 {
 	ssize_t vsize = PATCH_SIZE + 1;
 	CTerrain* terrain = m_Patch->m_Parent;
 	CmpPtr<ICmpWaterManager> cmpWaterManager(*m_Simulation, SYSTEM_ENTITY);
 
 	for (ssize_t k = 0; k < vsize; k++)
 	{
 		ssize_t gx = m_Patch->m_X * PATCH_SIZE;
 		ssize_t gz = m_Patch->m_Z * PATCH_SIZE;
 		switch (side)
 		{
 		case CPATCH_SIDE_NEGX: gz += k; break;
 		case CPATCH_SIDE_POSX: gx += PATCH_SIZE; gz += PATCH_SIZE-k; break;
 		case CPATCH_SIDE_NEGZ: gx += PATCH_SIZE-k; break;
 		case CPATCH_SIDE_POSZ: gz += PATCH_SIZE; gx += k; break;
 		}
 
 		CVector3D pos;
 		terrain->CalcPosition(gx, gz, pos);
 
 		// Clamp the height to the water level
 		float waterHeight = 0.f;
 		if (cmpWaterManager)
 			waterHeight = cmpWaterManager->GetExactWaterLevel(pos.X, pos.Z);
 		pos.Y = std::max(pos.Y, waterHeight);
 
 		SSideVertex v0, v1;
 		v0.m_Position = pos;
 		v1.m_Position = pos;
 		v1.m_Position.Y = 0;
 
 		if (k == 0)
 		{
 			vertices.emplace_back(v1);
 			vertices.emplace_back(v0);
 		}
 		if (k > 0)
 		{
 			const size_t lastIndex = vertices.size() - 1;
 			vertices.emplace_back(v1);
 			vertices.emplace_back(vertices[lastIndex]);
 			vertices.emplace_back(v0);
 			vertices.emplace_back(v1);
 			if (k + 1 < vsize)
 			{
 				vertices.emplace_back(v1);
 				vertices.emplace_back(v0);
 			}
 		}
 	}
 }
 
 void CPatchRData::BuildSides()
 {
 	PROFILE3("build sides");
 
 	std::vector<SSideVertex> sideVertices;
 
 	int sideFlags = m_Patch->GetSideFlags();
 
 	// If no sides are enabled, we don't need to do anything
 	if (!sideFlags)
 		return;
 
 	// For each side, generate a tristrip by adding a vertex at ground/water
 	// level and a vertex underneath at height 0.
 
 	if (sideFlags & CPATCH_SIDE_NEGX)
 		BuildSide(sideVertices, CPATCH_SIDE_NEGX);
 
 	if (sideFlags & CPATCH_SIDE_POSX)
 		BuildSide(sideVertices, CPATCH_SIDE_POSX);
 
 	if (sideFlags & CPATCH_SIDE_NEGZ)
 		BuildSide(sideVertices, CPATCH_SIDE_NEGZ);
 
 	if (sideFlags & CPATCH_SIDE_POSZ)
 		BuildSide(sideVertices, CPATCH_SIDE_POSZ);
 
 	if (sideVertices.empty())
 		return;
 
 	if (!m_VBSides)
 	{
 		m_VBSides = g_VBMan.AllocateChunk(
 			sizeof(SSideVertex), sideVertices.size(),
 			Renderer::Backend::GL::CBuffer::Type::VERTEX, false,
 			nullptr, CVertexBufferManager::Group::DEFAULT);
 	}
 	m_VBSides->m_Owner->UpdateChunkVertices(m_VBSides.Get(), &sideVertices[0]);
 }
 
 void CPatchRData::Build()
 {
 	BuildVertices();
 	BuildSides();
 	BuildIndices();
 	BuildBlends();
 	BuildWater();
 }
 
 void CPatchRData::Update(CSimulation2* simulation)
 {
 	m_Simulation = simulation;
 	if (m_UpdateFlags!=0) {
 		// TODO,RC 11/04/04 - need to only rebuild necessary bits of renderdata rather
 		// than everything; it's complicated slightly because the blends are dependent
 		// on both vertex and index data
 		BuildVertices();
 		BuildSides();
 		BuildIndices();
 		BuildBlends();
 		BuildWater();
 
 		m_UpdateFlags=0;
 	}
 }
 
-// Types used for glMultiDrawElements batching:
-
 // To minimise the cost of memory allocations, everything used for computing
 // batches uses a arena allocator. (All allocations are short-lived so we can
 // just throw away the whole arena at the end of each frame.)
 
 using Arena = Allocators::DynamicArena<1 * MiB>;
 
 // std::map types with appropriate arena allocators and default comparison operator
 template<class Key, class Value>
 using PooledBatchMap = std::map<Key, Value, std::less<Key>, ProxyAllocator<std::pair<Key const, Value>, Arena>>;
 
 // Equivalent to "m[k]", when it returns a arena-allocated std::map (since we can't
 // use the default constructor in that case)
 template<typename M>
 typename M::mapped_type& PooledMapGet(M& m, const typename M::key_type& k, Arena& arena)
 {
 	return m.insert(std::make_pair(k,
 		typename M::mapped_type(typename M::mapped_type::key_compare(), typename M::mapped_type::allocator_type(arena))
 	)).first->second;
 }
 
 // Equivalent to "m[k]", when it returns a std::pair of arena-allocated std::vectors
 template<typename M>
 typename M::mapped_type& PooledPairGet(M& m, const typename M::key_type& k, Arena& arena)
 {
 	return m.insert(std::make_pair(k, std::make_pair(
 			typename M::mapped_type::first_type(typename M::mapped_type::first_type::allocator_type(arena)),
 			typename M::mapped_type::second_type(typename M::mapped_type::second_type::allocator_type(arena))
 	))).first->second;
 }
 
 // Each multidraw batch has a list of index counts, and a list of pointers-to-first-indexes
-using BatchElements = std::pair<std::vector<GLint, ProxyAllocator<GLint, Arena>>, std::vector<void*, ProxyAllocator<void*, Arena>>>;
+using BatchElements = std::pair<std::vector<GLint, ProxyAllocator<GLint, Arena>>, std::vector<u32, ProxyAllocator<u32, Arena>>>;
 
 // Group batches by index buffer
 using IndexBufferBatches = PooledBatchMap<CVertexBuffer*, BatchElements>;
 
 // Group batches by vertex buffer
 using VertexBufferBatches = PooledBatchMap<CVertexBuffer*, IndexBufferBatches>;
 
 // Group batches by texture
 using TextureBatches = PooledBatchMap<CTerrainTextureEntry*, VertexBufferBatches>;
 
 // Group batches by shaders.
 using ShaderTechniqueBatches = PooledBatchMap<CShaderTechniquePtr, TextureBatches>;
 
 void CPatchRData::RenderBases(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const std::vector<CPatchRData*>& patches, const CShaderDefines& context, ShadowMap* shadow)
 {
 	PROFILE3("render terrain bases");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render terrain bases");
 
 	Arena arena;
 
 	ShaderTechniqueBatches batches(ShaderTechniqueBatches::key_compare(), (ShaderTechniqueBatches::allocator_type(arena)));
 
 	PROFILE_START("compute batches");
 
 	// Collect all the patches' base splats into their appropriate batches
 	for (size_t i = 0; i < patches.size(); ++i)
 	{
 		CPatchRData* patch = patches[i];
 		for (size_t j = 0; j < patch->m_Splats.size(); ++j)
 		{
 			SSplat& splat = patch->m_Splats[j];
 			const CMaterial& material = splat.m_Texture->GetMaterial();
 			if (material.GetShaderEffect().empty())
 			{
 				LOGERROR("Terrain renderer failed to load shader effect.\n");
 				continue;
 			}
 			CShaderDefines defines = context;
 			defines.SetMany(material.GetShaderDefines(0));
 			CShaderTechniquePtr techBase = g_Renderer.GetShaderManager().LoadEffect(
 				material.GetShaderEffect(), defines);
 
 			BatchElements& batch = PooledPairGet(
 				PooledMapGet(
 					PooledMapGet(
 						PooledMapGet(batches, techBase, arena),
 						splat.m_Texture, arena
 					),
 					patch->m_VBBase->m_Owner, arena
 				),
 				patch->m_VBBaseIndices->m_Owner, arena
 			);
 
 			batch.first.push_back(splat.m_IndexCount);
 
-			u8* indexBase = nullptr;
-			batch.second.push_back(indexBase + sizeof(u16)*(patch->m_VBBaseIndices->m_Index + splat.m_IndexStart));
+			batch.second.push_back(patch->m_VBBaseIndices->m_Index + splat.m_IndexStart);
 		}
 	}
 
 	PROFILE_END("compute batches");
 
 	// Render each batch
 	for (ShaderTechniqueBatches::iterator itTech = batches.begin(); itTech != batches.end(); ++itTech)
 	{
 		const CShaderTechniquePtr& techBase = itTech->first;
 		const int numPasses = techBase->GetNumPasses();
 		for (int pass = 0; pass < numPasses; ++pass)
 		{
 			techBase->BeginPass(pass);
 			deviceCommandContext->SetGraphicsPipelineState(
 				techBase->GetGraphicsPipelineStateDesc(pass));
 			const CShaderProgramPtr& shader = techBase->GetShader(pass);
 			TerrainRenderer::PrepareShader(shader, shadow);
 
 			TextureBatches& textureBatches = itTech->second;
 			for (TextureBatches::iterator itt = textureBatches.begin(); itt != textureBatches.end(); ++itt)
 			{
 				if (!itt->first->GetMaterial().GetSamplers().empty())
 				{
 					const CMaterial::SamplersVector& samplers = itt->first->GetMaterial().GetSamplers();
 					for(const CMaterial::TextureSampler& samp : samplers)
 						samp.Sampler->UploadBackendTextureIfNeeded(deviceCommandContext);
 					for(const CMaterial::TextureSampler& samp : samplers)
 						shader->BindTexture(samp.Name, samp.Sampler->GetBackendTexture());
 
 					itt->first->GetMaterial().GetStaticUniforms().BindUniforms(shader);
 
 					float c = itt->first->GetTextureMatrix()[0];
 					float ms = itt->first->GetTextureMatrix()[8];
 					shader->Uniform(str_textureTransform, c, ms, -ms, 0.f);
 				}
 				else
 				{
 					shader->BindTexture(str_baseTex, g_Renderer.GetTextureManager().GetErrorTexture()->GetBackendTexture());
 				}
 
 				for (VertexBufferBatches::iterator itv = itt->second.begin(); itv != itt->second.end(); ++itv)
 				{
 					GLsizei stride = sizeof(SBaseVertex);
 					SBaseVertex *base = (SBaseVertex *)itv->first->Bind(deviceCommandContext);
 					shader->VertexPointer(3, GL_FLOAT, stride, &base->m_Position[0]);
 					shader->NormalPointer(GL_FLOAT, stride, &base->m_Normal[0]);
 					shader->TexCoordPointer(GL_TEXTURE0, 3, GL_FLOAT, stride, &base->m_Position[0]);
 
 					shader->AssertPointersBound();
 
 					for (IndexBufferBatches::iterator it = itv->second.begin(); it != itv->second.end(); ++it)
 					{
-						it->first->Bind(deviceCommandContext);
+						it->first->UploadIfNeeded(deviceCommandContext);
+						deviceCommandContext->SetIndexBuffer(it->first->GetBuffer());
 
 						BatchElements& batch = it->second;
 
-						// Don't use glMultiDrawElements here since it doesn't have a significant
-						// performance impact and it suffers from various driver bugs (e.g. it breaks
-						// in Mesa 7.10 swrast with index VBOs)
 						for (size_t i = 0; i < batch.first.size(); ++i)
-							glDrawElements(GL_TRIANGLES, batch.first[i], GL_UNSIGNED_SHORT, batch.second[i]);
+							deviceCommandContext->DrawIndexed(batch.second[i], batch.first[i], 0);
 
 						g_Renderer.m_Stats.m_DrawCalls++;
 						g_Renderer.m_Stats.m_TerrainTris += std::accumulate(batch.first.begin(), batch.first.end(), 0) / 3;
 					}
 				}
 			}
 			techBase->EndPass();
 		}
 	}
 
 	CVertexBuffer::Unbind(deviceCommandContext);
 }
 
 /**
  * Helper structure for RenderBlends.
  */
 struct SBlendBatch
 {
 	SBlendBatch(Arena& arena) :
 		m_Batches(VertexBufferBatches::key_compare(), VertexBufferBatches::allocator_type(arena))
 	{
 	}
 
 	CTerrainTextureEntry* m_Texture;
 	CShaderTechniquePtr m_ShaderTech;
 	VertexBufferBatches m_Batches;
 };
 
 /**
  * Helper structure for RenderBlends.
  */
 struct SBlendStackItem
 {
 	SBlendStackItem(CVertexBuffer::VBChunk* v, CVertexBuffer::VBChunk* i,
 			const std::vector<CPatchRData::SSplat>& s, Arena& arena) :
 		vertices(v), indices(i), splats(s.begin(), s.end(), SplatStack::allocator_type(arena))
 	{
 	}
 
 	using SplatStack = std::vector<CPatchRData::SSplat, ProxyAllocator<CPatchRData::SSplat, Arena>>;
 	CVertexBuffer::VBChunk* vertices;
 	CVertexBuffer::VBChunk* indices;
 	SplatStack splats;
 };
 
 void CPatchRData::RenderBlends(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const std::vector<CPatchRData*>& patches, const CShaderDefines& context, ShadowMap* shadow)
 {
 	PROFILE3("render terrain blends");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render terrain blends");
 
 	Arena arena;
 
 	using BatchesStack = std::vector<SBlendBatch, ProxyAllocator<SBlendBatch, Arena>>;
 	BatchesStack batches((BatchesStack::allocator_type(arena)));
 
 	CShaderDefines contextBlend = context;
 	contextBlend.Add(str_BLEND, str_1);
 
  	PROFILE_START("compute batches");
 
  	// Reserve an arbitrary size that's probably big enough in most cases,
  	// to avoid heavy reallocations
  	batches.reserve(256);
 
 	using BlendStacks = std::vector<SBlendStackItem, ProxyAllocator<SBlendStackItem, Arena>>;
 	BlendStacks blendStacks((BlendStacks::allocator_type(arena)));
 	blendStacks.reserve(patches.size());
 
 	// Extract all the blend splats from each patch
  	for (size_t i = 0; i < patches.size(); ++i)
  	{
  		CPatchRData* patch = patches[i];
  		if (!patch->m_BlendSplats.empty())
  		{
 
  			blendStacks.push_back(SBlendStackItem(patch->m_VBBlends.Get(), patch->m_VBBlendIndices.Get(), patch->m_BlendSplats, arena));
  			// Reverse the splats so the first to be rendered is at the back of the list
  			std::reverse(blendStacks.back().splats.begin(), blendStacks.back().splats.end());
  		}
  	}
 
  	// Rearrange the collection of splats to be grouped by texture, preserving
  	// order of splats within each patch:
  	// (This is exactly the same algorithm used in CPatchRData::BuildBlends,
  	// but applied to patch-sized splats rather than to tile-sized splats;
  	// see that function for comments on the algorithm.)
 	while (true)
 	{
 		if (!batches.empty())
 		{
 			CTerrainTextureEntry* tex = batches.back().m_Texture;
 
 			for (size_t k = 0; k < blendStacks.size(); ++k)
 			{
 				SBlendStackItem::SplatStack& splats = blendStacks[k].splats;
 				if (!splats.empty() && splats.back().m_Texture == tex)
 				{
 					CVertexBuffer::VBChunk* vertices = blendStacks[k].vertices;
 					CVertexBuffer::VBChunk* indices = blendStacks[k].indices;
 
 					BatchElements& batch = PooledPairGet(PooledMapGet(batches.back().m_Batches, vertices->m_Owner, arena), indices->m_Owner, arena);
 					batch.first.push_back(splats.back().m_IndexCount);
 
-		 			u8* indexBase = nullptr;
-		 			batch.second.push_back(indexBase + sizeof(u16)*(indices->m_Index + splats.back().m_IndexStart));
+		 			batch.second.push_back(indices->m_Index + splats.back().m_IndexStart);
 
 					splats.pop_back();
 				}
 			}
 		}
 
 		CTerrainTextureEntry* bestTex = NULL;
 		size_t bestStackSize = 0;
 
 		for (size_t k = 0; k < blendStacks.size(); ++k)
 		{
 			SBlendStackItem::SplatStack& splats = blendStacks[k].splats;
 			if (splats.size() > bestStackSize)
 			{
 				bestStackSize = splats.size();
 				bestTex = splats.back().m_Texture;
 			}
 		}
 
 		if (bestStackSize == 0)
 			break;
 
 		SBlendBatch layer(arena);
 		layer.m_Texture = bestTex;
 		if (!bestTex->GetMaterial().GetSamplers().empty())
 		{
 			CShaderDefines defines = contextBlend;
 			defines.SetMany(bestTex->GetMaterial().GetShaderDefines(0));
 			layer.m_ShaderTech = g_Renderer.GetShaderManager().LoadEffect(
 				bestTex->GetMaterial().GetShaderEffect(), defines);
 		}
 		batches.push_back(layer);
 	}
 
 	PROFILE_END("compute batches");
 
 	CVertexBuffer* lastVB = nullptr;
 	CShaderProgramPtr previousShader;
 	for (BatchesStack::iterator itTechBegin = batches.begin(), itTechEnd = batches.begin(); itTechBegin != batches.end(); itTechBegin = itTechEnd)
 	{
 		while (itTechEnd != batches.end() && itTechEnd->m_ShaderTech == itTechBegin->m_ShaderTech)
 			++itTechEnd;
 
 		const CShaderTechniquePtr& techBase = itTechBegin->m_ShaderTech;
 		const int numPasses = techBase->GetNumPasses();
 		for (int pass = 0; pass < numPasses; ++pass)
 		{
 			Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 				techBase->GetGraphicsPipelineStateDesc(pass);
 			pipelineStateDesc.blendState.enabled = true;
 			pipelineStateDesc.blendState.srcColorBlendFactor = pipelineStateDesc.blendState.srcAlphaBlendFactor =
 				Renderer::Backend::BlendFactor::SRC_ALPHA;
 			pipelineStateDesc.blendState.dstColorBlendFactor = pipelineStateDesc.blendState.dstAlphaBlendFactor =
 				Renderer::Backend::BlendFactor::ONE_MINUS_SRC_ALPHA;
 			pipelineStateDesc.blendState.colorBlendOp = pipelineStateDesc.blendState.alphaBlendOp =
 				Renderer::Backend::BlendOp::ADD;
 			techBase->BeginPass(pass);
 			deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 
 			const CShaderProgramPtr& shader = techBase->GetShader(pass);
 			TerrainRenderer::PrepareShader(shader, shadow);
 
 			Renderer::Backend::GL::CTexture* lastBlendTex = nullptr;
 
 			for (BatchesStack::iterator itt = itTechBegin; itt != itTechEnd; ++itt)
 			{
 				if (itt->m_Texture->GetMaterial().GetSamplers().empty())
 					continue;
 
 				if (itt->m_Texture)
 				{
 					const CMaterial::SamplersVector& samplers = itt->m_Texture->GetMaterial().GetSamplers();
 					for (const CMaterial::TextureSampler& samp : samplers)
 						samp.Sampler->UploadBackendTextureIfNeeded(deviceCommandContext);
 					for (const CMaterial::TextureSampler& samp : samplers)
 						shader->BindTexture(samp.Name, samp.Sampler->GetBackendTexture());
 
 					Renderer::Backend::GL::CTexture* currentBlendTex = itt->m_Texture->m_TerrainAlpha->second.m_CompositeAlphaMap.get();
 					if (currentBlendTex != lastBlendTex)
 					{
 						shader->BindTexture(str_blendTex, currentBlendTex);
 						lastBlendTex = currentBlendTex;
 					}
 
 					itt->m_Texture->GetMaterial().GetStaticUniforms().BindUniforms(shader);
 
 					float c = itt->m_Texture->GetTextureMatrix()[0];
 					float ms = itt->m_Texture->GetTextureMatrix()[8];
 					shader->Uniform(str_textureTransform, c, ms, -ms, 0.f);
 				}
 				else
 				{
 					shader->BindTexture(str_baseTex, g_Renderer.GetTextureManager().GetErrorTexture()->GetBackendTexture());
 				}
 
 				for (VertexBufferBatches::iterator itv = itt->m_Batches.begin(); itv != itt->m_Batches.end(); ++itv)
 				{
 					// Rebind the VB only if it changed since the last batch
 					if (itv->first != lastVB || shader != previousShader)
 					{
 						lastVB = itv->first;
 						previousShader = shader;
 						GLsizei stride = sizeof(SBlendVertex);
 						SBlendVertex *base = (SBlendVertex *)itv->first->Bind(deviceCommandContext);
 
 						shader->VertexPointer(3, GL_FLOAT, stride, &base->m_Position[0]);
 						shader->NormalPointer(GL_FLOAT, stride, &base->m_Normal[0]);
 						shader->TexCoordPointer(GL_TEXTURE0, 3, GL_FLOAT, stride, &base->m_Position[0]);
 						shader->TexCoordPointer(GL_TEXTURE1, 2, GL_FLOAT, stride, &base->m_AlphaUVs[0]);
 					}
 
 					shader->AssertPointersBound();
 
 					for (IndexBufferBatches::iterator it = itv->second.begin(); it != itv->second.end(); ++it)
 					{
-						it->first->Bind(deviceCommandContext);
+						it->first->UploadIfNeeded(deviceCommandContext);
+						deviceCommandContext->SetIndexBuffer(it->first->GetBuffer());
 
 						BatchElements& batch = it->second;
 
 						for (size_t i = 0; i < batch.first.size(); ++i)
-							glDrawElements(GL_TRIANGLES, batch.first[i], GL_UNSIGNED_SHORT, batch.second[i]);
+							deviceCommandContext->DrawIndexed(batch.second[i], batch.first[i], 0);
 
 						g_Renderer.m_Stats.m_DrawCalls++;
 						g_Renderer.m_Stats.m_BlendSplats++;
 						g_Renderer.m_Stats.m_TerrainTris += std::accumulate(batch.first.begin(), batch.first.end(), 0) / 3;
 					}
 				}
 			}
 			techBase->EndPass();
 		}
 	}
 
 	CVertexBuffer::Unbind(deviceCommandContext);
 }
 
 void CPatchRData::RenderStreams(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const std::vector<CPatchRData*>& patches, const CShaderProgramPtr& shader, int streamflags)
 {
 	PROFILE3("render terrain streams");
 
 	// Each batch has a list of index counts, and a list of pointers-to-first-indexes
-	using StreamBatchElements = std::pair<std::vector<GLint>, std::vector<void*> > ;
+	using StreamBatchElements = std::pair<std::vector<GLint>, std::vector<u32>>;
 
 	// Group batches by index buffer
-	using StreamIndexBufferBatches = std::map<CVertexBuffer*, StreamBatchElements> ;
+	using StreamIndexBufferBatches = std::map<CVertexBuffer*, StreamBatchElements>;
 
 	// Group batches by vertex buffer
-	using StreamVertexBufferBatches = std::map<CVertexBuffer*, StreamIndexBufferBatches> ;
+	using StreamVertexBufferBatches = std::map<CVertexBuffer*, StreamIndexBufferBatches>;
 
 	StreamVertexBufferBatches batches;
 
  	PROFILE_START("compute batches");
 
  	// Collect all the patches into their appropriate batches
 	for (const CPatchRData* patch : patches)
 	{
 		StreamBatchElements& batch = batches[patch->m_VBBase->m_Owner][patch->m_VBBaseIndices->m_Owner];
 
 		batch.first.push_back(patch->m_VBBaseIndices->m_Count);
 
-		u8* indexBase = nullptr;
- 		batch.second.push_back(indexBase + sizeof(u16)*(patch->m_VBBaseIndices->m_Index));
+ 		batch.second.push_back(patch->m_VBBaseIndices->m_Index);
  	}
 
  	PROFILE_END("compute batches");
 
 	ENSURE(!(streamflags & ~(STREAM_POS|STREAM_POSTOUV0|STREAM_POSTOUV1)));
 
  	// Render each batch
 	for (const std::pair<CVertexBuffer* const, StreamIndexBufferBatches>& streamBatch : batches)
 	{
 		GLsizei stride = sizeof(SBaseVertex);
 		SBaseVertex *base = (SBaseVertex *)streamBatch.first->Bind(deviceCommandContext);
 
 		shader->VertexPointer(3, GL_FLOAT, stride, &base->m_Position);
 		if (streamflags & STREAM_POSTOUV0)
 			shader->TexCoordPointer(GL_TEXTURE0, 3, GL_FLOAT, stride, &base->m_Position);
 		if (streamflags & STREAM_POSTOUV1)
 			shader->TexCoordPointer(GL_TEXTURE1, 3, GL_FLOAT, stride, &base->m_Position);
 
 		shader->AssertPointersBound();
 
 		for (const std::pair<CVertexBuffer* const, StreamBatchElements>& batchIndexBuffer : streamBatch.second)
 		{
-			batchIndexBuffer.first->Bind(deviceCommandContext);
+			batchIndexBuffer.first->UploadIfNeeded(deviceCommandContext);
+			deviceCommandContext->SetIndexBuffer(batchIndexBuffer.first->GetBuffer());
 
 			const StreamBatchElements& batch = batchIndexBuffer.second;
 
 			for (size_t i = 0; i < batch.first.size(); ++i)
-				glDrawElements(GL_TRIANGLES, batch.first[i], GL_UNSIGNED_SHORT, batch.second[i]);
+				deviceCommandContext->DrawIndexed(batch.second[i], batch.first[i], 0);
 
 			g_Renderer.m_Stats.m_DrawCalls++;
 			g_Renderer.m_Stats.m_TerrainTris += std::accumulate(batch.first.begin(), batch.first.end(), 0) / 3;
 		}
 	}
 
 	CVertexBuffer::Unbind(deviceCommandContext);
 }
 
 void CPatchRData::RenderOutline()
 {
 	CTerrain* terrain = m_Patch->m_Parent;
 	ssize_t gx = m_Patch->m_X * PATCH_SIZE;
 	ssize_t gz = m_Patch->m_Z * PATCH_SIZE;
 
 	CVector3D pos;
 	std::vector<CVector3D> line;
 	for (ssize_t i = 0, j = 0; i <= PATCH_SIZE; ++i)
 	{
 		terrain->CalcPosition(gx + i, gz + j, pos);
 		line.push_back(pos);
 	}
 	for (ssize_t i = PATCH_SIZE, j = 1; j <= PATCH_SIZE; ++j)
 	{
 		terrain->CalcPosition(gx + i, gz + j, pos);
 		line.push_back(pos);
 	}
 	for (ssize_t i = PATCH_SIZE-1, j = PATCH_SIZE; i >= 0; --i)
 	{
 		terrain->CalcPosition(gx + i, gz + j, pos);
 		line.push_back(pos);
 	}
 	for (ssize_t i = 0, j = PATCH_SIZE-1; j >= 0; --j)
 	{
 		terrain->CalcPosition(gx + i, gz + j, pos);
 		line.push_back(pos);
 	}
 
 	g_Renderer.GetDebugRenderer().DrawLine(line, CColor(0.0f, 0.0f, 1.0f, 1.0f), 0.1f);
 }
 
 void CPatchRData::RenderSides(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const std::vector<CPatchRData*>& patches, const CShaderProgramPtr& shader)
 {
 	PROFILE3("render terrain sides");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render terrain sides");
 
 	CVertexBuffer* lastVB = nullptr;
 	for (CPatchRData* patch : patches)
 	{
 		ENSURE(patch->m_UpdateFlags == 0);
 		if (!patch->m_VBSides)
 			continue;
 		if (lastVB != patch->m_VBSides->m_Owner)
 		{
 			lastVB = patch->m_VBSides->m_Owner;
 			SSideVertex *base = (SSideVertex*)patch->m_VBSides->m_Owner->Bind(deviceCommandContext);
 
 			// setup data pointers
 			GLsizei stride = sizeof(SSideVertex);
 			shader->VertexPointer(3, GL_FLOAT, stride, &base->m_Position);
 		}
 
 		shader->AssertPointersBound();
 
-		glDrawArrays(GL_TRIANGLES, patch->m_VBSides->m_Index, (GLsizei)patch->m_VBSides->m_Count);
+		deviceCommandContext->Draw(patch->m_VBSides->m_Index, (GLsizei)patch->m_VBSides->m_Count);
 
 		// bump stats
 		g_Renderer.m_Stats.m_DrawCalls++;
 		g_Renderer.m_Stats.m_TerrainTris += patch->m_VBSides->m_Count / 3;
 	}
 
 	CVertexBuffer::Unbind(deviceCommandContext);
 }
 
 void CPatchRData::RenderPriorities(CTextRenderer& textRenderer)
 {
 	CTerrain* terrain = m_Patch->m_Parent;
 	const CCamera& camera = *(g_Game->GetView()->GetCamera());
 
 	for (ssize_t j = 0; j < PATCH_SIZE; ++j)
 	{
 		for (ssize_t i = 0; i < PATCH_SIZE; ++i)
 		{
 			ssize_t gx = m_Patch->m_X * PATCH_SIZE + i;
 			ssize_t gz = m_Patch->m_Z * PATCH_SIZE + j;
 
 			CVector3D pos;
 			terrain->CalcPosition(gx, gz, pos);
 
 			// Move a bit towards the center of the tile
 			pos.X += TERRAIN_TILE_SIZE/4.f;
 			pos.Z += TERRAIN_TILE_SIZE/4.f;
 
 			float x, y;
 			camera.GetScreenCoordinates(pos, x, y);
 
 			textRenderer.PrintfAt(x, y, L"%d", m_Patch->m_MiniPatches[j][i].Priority);
 		}
 	}
 }
 
 //
 // Water build and rendering
 //
 
 // Build vertex buffer for water vertices over our patch
 void CPatchRData::BuildWater()
 {
 	PROFILE3("build water");
 
 	// Number of vertices in each direction in each patch
 	ENSURE(PATCH_SIZE % water_cell_size == 0);
 
 	m_VBWater.Reset();
 	m_VBWaterIndices.Reset();
 	m_VBWaterShore.Reset();
 	m_VBWaterIndicesShore.Reset();
 
 	m_WaterBounds.SetEmpty();
 
 	// We need to use this to access the water manager or we may not have the
 	// actual values but some compiled-in defaults
 	CmpPtr<ICmpWaterManager> cmpWaterManager(*m_Simulation, SYSTEM_ENTITY);
 	if (!cmpWaterManager)
 		return;
 
 	// Build data for water
 	std::vector<SWaterVertex> water_vertex_data;
 	std::vector<GLushort> water_indices;
 	u16 water_index_map[PATCH_SIZE+1][PATCH_SIZE+1];
 	memset(water_index_map, 0xFF, sizeof(water_index_map));
 
 	// Build data for shore
 	std::vector<SWaterVertex> water_vertex_data_shore;
 	std::vector<GLushort> water_indices_shore;
 	u16 water_shore_index_map[PATCH_SIZE+1][PATCH_SIZE+1];
 	memset(water_shore_index_map, 0xFF, sizeof(water_shore_index_map));
 
 	const WaterManager& waterManager = g_Renderer.GetSceneRenderer().GetWaterManager();
 
 	CPatch* patch = m_Patch;
 	CTerrain* terrain = patch->m_Parent;
 
 	ssize_t mapSize = terrain->GetVerticesPerSide();
 
 	// Top-left coordinates of our patch.
 	ssize_t px = m_Patch->m_X * PATCH_SIZE;
 	ssize_t pz = m_Patch->m_Z * PATCH_SIZE;
 
 	// To whoever implements different water heights, this is a TODO: water height)
 	float waterHeight = cmpWaterManager->GetExactWaterLevel(0.0f,0.0f);
 
 	// The 4 points making a water tile.
 	int moves[4][2] = {
 		{0, 0},
 		{water_cell_size, 0},
 		{0, water_cell_size},
 		{water_cell_size, water_cell_size}
 	};
 	// Where to look for when checking for water for shore tiles.
 	int check[10][2] = {
 		{0, 0},
 		{water_cell_size, 0},
 		{water_cell_size*2, 0},
 		{0, water_cell_size},
 		{0, water_cell_size*2},
 		{water_cell_size, water_cell_size},
 		{water_cell_size*2, water_cell_size*2},
 		{-water_cell_size, 0},
 		{0, -water_cell_size},
 		{-water_cell_size, -water_cell_size}
 	};
 
 	// build vertices, uv, and shader varying
 	for (ssize_t z = 0; z < PATCH_SIZE; z += water_cell_size)
 	{
 		for (ssize_t x = 0; x < PATCH_SIZE; x += water_cell_size)
 		{
 			// Check that this tile is close to water
 			bool nearWater = false;
 			for (size_t test = 0; test < 10; ++test)
 				if (terrain->GetVertexGroundLevel(x + px + check[test][0], z + pz + check[test][1]) < waterHeight)
 					nearWater = true;
 			if (!nearWater)
 				continue;
 
 			// This is actually lying and I should call CcmpTerrain
 			/*if (!terrain->IsOnMap(x+x1, z+z1)
 			 && !terrain->IsOnMap(x+x1, z+z1 + water_cell_size)
 			 && !terrain->IsOnMap(x+x1 + water_cell_size, z+z1)
 			 && !terrain->IsOnMap(x+x1 + water_cell_size, z+z1 + water_cell_size))
 			 continue;*/
 
 			for (int i = 0; i < 4; ++i)
 			{
 				if (water_index_map[z+moves[i][1]][x+moves[i][0]] != 0xFFFF)
 					continue;
 
 				ssize_t xx = x + px + moves[i][0];
 				ssize_t zz = z + pz + moves[i][1];
 
 				SWaterVertex vertex;
 				terrain->CalcPosition(xx,zz, vertex.m_Position);
 				float depth = waterHeight - vertex.m_Position.Y;
 
 				vertex.m_Position.Y = waterHeight;
 
 				m_WaterBounds += vertex.m_Position;
 
 				vertex.m_WaterData = CVector2D(waterManager.m_WindStrength[xx + zz*mapSize], depth);
 
 				water_index_map[z+moves[i][1]][x+moves[i][0]] = static_cast<u16>(water_vertex_data.size());
 				water_vertex_data.push_back(vertex);
 			}
 			water_indices.push_back(water_index_map[z + moves[2][1]][x + moves[2][0]]);
 			water_indices.push_back(water_index_map[z + moves[0][1]][x + moves[0][0]]);
 			water_indices.push_back(water_index_map[z + moves[1][1]][x + moves[1][0]]);
 			water_indices.push_back(water_index_map[z + moves[1][1]][x + moves[1][0]]);
 			water_indices.push_back(water_index_map[z + moves[3][1]][x + moves[3][0]]);
 			water_indices.push_back(water_index_map[z + moves[2][1]][x + moves[2][0]]);
 
 			// Check id this tile is partly over land.
 			// If so add a square over the terrain. This is necessary to render waves that go on shore.
 			if (terrain->GetVertexGroundLevel(x+px, z+pz) < waterHeight &&
 				terrain->GetVertexGroundLevel(x+px + water_cell_size, z+pz) < waterHeight &&
 				terrain->GetVertexGroundLevel(x+px, z+pz+water_cell_size) < waterHeight &&
 				terrain->GetVertexGroundLevel(x+px + water_cell_size, z+pz+water_cell_size) < waterHeight)
 				continue;
 
 			for (int i = 0; i < 4; ++i)
 			{
 				if (water_shore_index_map[z+moves[i][1]][x+moves[i][0]] != 0xFFFF)
 					continue;
 				ssize_t xx = x + px + moves[i][0];
 				ssize_t zz = z + pz + moves[i][1];
 
 				SWaterVertex vertex;
 				terrain->CalcPosition(xx,zz, vertex.m_Position);
 
 				vertex.m_Position.Y += 0.02f;
 				m_WaterBounds += vertex.m_Position;
 
 				vertex.m_WaterData = CVector2D(0.0f, -5.0f);
 
 				water_shore_index_map[z+moves[i][1]][x+moves[i][0]] = static_cast<u16>(water_vertex_data_shore.size());
 				water_vertex_data_shore.push_back(vertex);
 			}
 			if (terrain->GetTriangulationDir(x + px, z + pz))
 			{
 				water_indices_shore.push_back(water_shore_index_map[z + moves[2][1]][x + moves[2][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[0][1]][x + moves[0][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[1][1]][x + moves[1][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[1][1]][x + moves[1][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[3][1]][x + moves[3][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[2][1]][x + moves[2][0]]);
 			}
 			else
 			{
 				water_indices_shore.push_back(water_shore_index_map[z + moves[3][1]][x + moves[3][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[2][1]][x + moves[2][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[0][1]][x + moves[0][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[3][1]][x + moves[3][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[0][1]][x + moves[0][0]]);
 				water_indices_shore.push_back(water_shore_index_map[z + moves[1][1]][x + moves[1][0]]);
 			}
 		}
 	}
 
 	// No vertex buffers if no data generated
 	if (!water_indices.empty())
 	{
 		m_VBWater = g_VBMan.AllocateChunk(
 			sizeof(SWaterVertex), water_vertex_data.size(),
 			Renderer::Backend::GL::CBuffer::Type::VERTEX, false,
 			nullptr, CVertexBufferManager::Group::WATER);
 		m_VBWater->m_Owner->UpdateChunkVertices(m_VBWater.Get(), &water_vertex_data[0]);
 
 		m_VBWaterIndices = g_VBMan.AllocateChunk(
 			sizeof(GLushort), water_indices.size(),
 			Renderer::Backend::GL::CBuffer::Type::INDEX, false,
 			nullptr, CVertexBufferManager::Group::WATER);
 		m_VBWaterIndices->m_Owner->UpdateChunkVertices(m_VBWaterIndices.Get(), &water_indices[0]);
 	}
 
 	if (!water_indices_shore.empty())
 	{
 		m_VBWaterShore = g_VBMan.AllocateChunk(
 			sizeof(SWaterVertex), water_vertex_data_shore.size(),
 			Renderer::Backend::GL::CBuffer::Type::VERTEX, false,
 			nullptr, CVertexBufferManager::Group::WATER);
 		m_VBWaterShore->m_Owner->UpdateChunkVertices(m_VBWaterShore.Get(), &water_vertex_data_shore[0]);
 
 		// Construct indices buffer
 		m_VBWaterIndicesShore = g_VBMan.AllocateChunk(
 			sizeof(GLushort), water_indices_shore.size(),
 			Renderer::Backend::GL::CBuffer::Type::INDEX, false,
 			nullptr, CVertexBufferManager::Group::WATER);
 		m_VBWaterIndicesShore->m_Owner->UpdateChunkVertices(m_VBWaterIndicesShore.Get(), &water_indices_shore[0]);
 	}
 }
 
 void CPatchRData::RenderWaterSurface(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CShaderProgramPtr& shader, const bool bindWaterData)
 {
 	ASSERT(m_UpdateFlags == 0);
 
 	if (!m_VBWater)
 		return;
 
+	m_VBWaterIndices->m_Owner->UploadIfNeeded(deviceCommandContext);
+
 	SWaterVertex* base = reinterpret_cast<SWaterVertex*>(m_VBWater->m_Owner->Bind(deviceCommandContext));
 
 	// Setup data pointers.
 	const GLsizei stride = sizeof(SWaterVertex);
 	shader->VertexPointer(3, GL_FLOAT, stride, &base[m_VBWater->m_Index].m_Position);
 	if (bindWaterData)
 		shader->VertexAttribPointer(str_a_waterInfo, 2, GL_FLOAT, false, stride, &base[m_VBWater->m_Index].m_WaterData);
 
 	shader->AssertPointersBound();
 
-	u8* indexBase = m_VBWaterIndices->m_Owner->Bind(deviceCommandContext);
-	glDrawElements(
-		GL_TRIANGLES, static_cast<GLsizei>(m_VBWaterIndices->m_Count),
-		GL_UNSIGNED_SHORT, indexBase + sizeof(u16)*(m_VBWaterIndices->m_Index));
+	deviceCommandContext->SetIndexBuffer(m_VBWaterIndices->m_Owner->GetBuffer());
+	deviceCommandContext->DrawIndexed(m_VBWaterIndices->m_Index, m_VBWaterIndices->m_Count, 0);
 
 	g_Renderer.m_Stats.m_DrawCalls++;
 	g_Renderer.m_Stats.m_WaterTris += m_VBWaterIndices->m_Count / 3;
 
 	CVertexBuffer::Unbind(deviceCommandContext);
 }
 
 void CPatchRData::RenderWaterShore(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CShaderProgramPtr& shader)
 {
 	ASSERT(m_UpdateFlags == 0);
 
 	if (!m_VBWaterShore)
 		return;
 
+	m_VBWaterIndicesShore->m_Owner->UploadIfNeeded(deviceCommandContext);
+
 	SWaterVertex* base = reinterpret_cast<SWaterVertex*>(m_VBWaterShore->m_Owner->Bind(deviceCommandContext));
 
 	const GLsizei stride = sizeof(SWaterVertex);
 	shader->VertexPointer(3, GL_FLOAT, stride, &base[m_VBWaterShore->m_Index].m_Position);
 	shader->VertexAttribPointer(str_a_waterInfo, 2, GL_FLOAT, false, stride, &base[m_VBWaterShore->m_Index].m_WaterData);
 
 	shader->AssertPointersBound();
 
-	u8* indexBase = m_VBWaterIndicesShore->m_Owner->Bind(deviceCommandContext);
-	glDrawElements(GL_TRIANGLES, static_cast<GLsizei>(m_VBWaterIndicesShore->m_Count),
-					GL_UNSIGNED_SHORT, indexBase + sizeof(u16)*(m_VBWaterIndicesShore->m_Index));
+	deviceCommandContext->SetIndexBuffer(m_VBWaterIndicesShore->m_Owner->GetBuffer());
+	deviceCommandContext->DrawIndexed(m_VBWaterIndicesShore->m_Index, m_VBWaterIndicesShore->m_Count, 0);
 
 	g_Renderer.m_Stats.m_DrawCalls++;
 	g_Renderer.m_Stats.m_WaterTris += m_VBWaterIndicesShore->m_Count / 3;
 
 	CVertexBuffer::Unbind(deviceCommandContext);
 }
Index: ps/trunk/source/renderer/PostprocManager.cpp
===================================================================
--- ps/trunk/source/renderer/PostprocManager.cpp	(revision 26524)
+++ ps/trunk/source/renderer/PostprocManager.cpp	(revision 26525)
@@ -1,768 +1,768 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "renderer/PostprocManager.h"
 
 #include "graphics/GameView.h"
 #include "graphics/LightEnv.h"
 #include "graphics/ShaderManager.h"
 #include "lib/bits.h"
 #include "lib/ogl.h"
 #include "maths/MathUtil.h"
 #include "ps/ConfigDB.h"
 #include "ps/CLogger.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Filesystem.h"
 #include "ps/Game.h"
 #include "ps/VideoMode.h"
 #include "ps/World.h"
 #include "renderer/backend/gl/Device.h"
 #include "renderer/Renderer.h"
 #include "renderer/RenderingOptions.h"
 #include "tools/atlas/GameInterface/GameLoop.h"
 
 #if !CONFIG2_GLES
 
 CPostprocManager::CPostprocManager()
 	: m_IsInitialized(false), m_PostProcEffect(L"default"), m_WhichBuffer(true),
 	m_Sharpness(0.3f), m_UsingMultisampleBuffer(false), m_MultisampleCount(0)
 {
 }
 
 CPostprocManager::~CPostprocManager()
 {
 	Cleanup();
 }
 
 bool CPostprocManager::IsEnabled() const
 {
 	return g_RenderingOptions.GetPostProc() &&
 		g_VideoMode.GetBackend() != CVideoMode::Backend::GL_ARB;
 }
 
 void CPostprocManager::Cleanup()
 {
 	if (!m_IsInitialized) // Only cleanup if previously used
 		return;
 
 	m_CaptureFramebuffer.reset();
 
 	m_PingFramebuffer.reset();
 	m_PongFramebuffer.reset();
 
 	m_ColorTex1.reset();
 	m_ColorTex2.reset();
 	m_DepthTex.reset();
 
 	for (BlurScale& scale : m_BlurScales)
 	{
 		for (BlurScale::Step& step : scale.steps)
 		{
 			step.framebuffer.reset();
 			step.texture.reset();
 		}
 	}
 }
 
 void CPostprocManager::Initialize()
 {
 	if (m_IsInitialized)
 		return;
 
 	const uint32_t maxSamples = g_VideoMode.GetBackendDevice()->GetCapabilities().maxSampleCount;
 	const uint32_t possibleSampleCounts[] = {2, 4, 8, 16};
 	std::copy_if(
 		std::begin(possibleSampleCounts), std::end(possibleSampleCounts),
 		std::back_inserter(m_AllowedSampleCounts),
 		[maxSamples](const uint32_t sampleCount) { return sampleCount <= maxSamples; } );
 
 	// The screen size starts out correct and then must be updated with Resize()
 	m_Width = g_Renderer.GetWidth();
 	m_Height = g_Renderer.GetHeight();
 
 	RecreateBuffers();
 	m_IsInitialized = true;
 
 	// Once we have initialised the buffers, we can update the techniques.
 	UpdateAntiAliasingTechnique();
 	UpdateSharpeningTechnique();
 	UpdateSharpnessFactor();
 
 	// This might happen after the map is loaded and the effect chosen
 	SetPostEffect(m_PostProcEffect);
 }
 
 void CPostprocManager::Resize()
 {
 	m_Width = g_Renderer.GetWidth();
 	m_Height = g_Renderer.GetHeight();
 
 	// If the buffers were intialized, recreate them to the new size.
 	if (m_IsInitialized)
 		RecreateBuffers();
 }
 
 void CPostprocManager::RecreateBuffers()
 {
 	Cleanup();
 
 	Renderer::Backend::GL::CDevice* backendDevice = g_VideoMode.GetBackendDevice();
 
 	#define GEN_BUFFER_RGBA(name, w, h) \
 		name = backendDevice->CreateTexture2D( \
 			"PostProc" #name, Renderer::Backend::Format::R8G8B8A8, w, h, \
 			Renderer::Backend::Sampler::MakeDefaultSampler( \
 				Renderer::Backend::Sampler::Filter::LINEAR, \
 				Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE));
 
 	// Two fullscreen ping-pong textures.
 	GEN_BUFFER_RGBA(m_ColorTex1, m_Width, m_Height);
 	GEN_BUFFER_RGBA(m_ColorTex2, m_Width, m_Height);
 
 	// Textures for several blur sizes. It would be possible to reuse
 	// m_BlurTex2b, thus avoiding the need for m_BlurTex4b and m_BlurTex8b, though given
 	// that these are fairly small it's probably not worth complicating the coordinates passed
 	// to the blur helper functions.
 	uint32_t width = m_Width / 2, height = m_Height / 2;
 	for (BlurScale& scale : m_BlurScales)
 	{
 		for (BlurScale::Step& step : scale.steps)
 		{
 			GEN_BUFFER_RGBA(step.texture, width, height);
 			step.framebuffer = backendDevice->CreateFramebuffer("BlurScaleSteoFramebuffer",
 				step.texture.get(), nullptr);
 		}
 		width /= 2;
 		height /= 2;
 	}
 
 	#undef GEN_BUFFER_RGBA
 
 	// Allocate the Depth/Stencil texture.
 	m_DepthTex = backendDevice->CreateTexture2D("PostPRocDepthTexture",
 		Renderer::Backend::Format::D24_S8, m_Width, m_Height,
 		Renderer::Backend::Sampler::MakeDefaultSampler(
 			Renderer::Backend::Sampler::Filter::LINEAR,
 			Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE));
 
 	// Set up the framebuffers with some initial textures.
 	m_CaptureFramebuffer = backendDevice->CreateFramebuffer("PostprocCaptureFramebuffer",
 		m_ColorTex1.get(), m_DepthTex.get(),
 		g_VideoMode.GetBackendDevice()->GetCurrentBackbuffer()->GetClearColor());
 
 	m_PingFramebuffer = backendDevice->CreateFramebuffer("PostprocPingFramebuffer",
 		m_ColorTex1.get(), nullptr);
 	m_PongFramebuffer = backendDevice->CreateFramebuffer("PostprocPongFramebuffer",
 		m_ColorTex2.get(), nullptr);
 
 	if (!m_CaptureFramebuffer || !m_PingFramebuffer || !m_PongFramebuffer)
 	{
 		LOGWARNING("Failed to create postproc framebuffers");
 		g_RenderingOptions.SetPostProc(false);
 	}
 
 	if (m_UsingMultisampleBuffer)
 	{
 		DestroyMultisampleBuffer();
 		CreateMultisampleBuffer();
 	}
 }
 
 
 void CPostprocManager::ApplyBlurDownscale2x(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	Renderer::Backend::GL::CFramebuffer* framebuffer,
 	Renderer::Backend::GL::CTexture* inTex, int inWidth, int inHeight)
 {
 	deviceCommandContext->SetFramebuffer(framebuffer);
 
 	// Get bloom shader with instructions to simply copy texels.
 	CShaderDefines defines;
 	defines.Add(str_BLOOM_NOP, str_1);
 	CShaderTechniquePtr tech = g_Renderer.GetShaderManager().LoadEffect(str_bloom, defines);
 
 	tech->BeginPass();
 	deviceCommandContext->SetGraphicsPipelineState(
 		tech->GetGraphicsPipelineStateDesc());
 	const CShaderProgramPtr& shader = tech->GetShader();
 
 	shader->BindTexture(str_renderedTex, inTex);
 
 	const SViewPort oldVp = g_Renderer.GetViewport();
 	const SViewPort vp = { 0, 0, inWidth / 2, inHeight / 2 };
 	g_Renderer.SetViewport(vp);
 
 	float quadVerts[] =
 	{
 		1.0f, 1.0f,
 		-1.0f, 1.0f,
 		-1.0f, -1.0f,
 
 		-1.0f, -1.0f,
 		1.0f, -1.0f,
 		1.0f, 1.0f
 	};
 	float quadTex[] =
 	{
 		1.0f, 1.0f,
 		0.0f, 1.0f,
 		0.0f, 0.0f,
 
 		0.0f, 0.0f,
 		1.0f, 0.0f,
 		1.0f, 1.0f
 	};
 	shader->TexCoordPointer(GL_TEXTURE0, 2, GL_FLOAT, 0, quadTex);
 	shader->VertexPointer(2, GL_FLOAT, 0, quadVerts);
 	shader->AssertPointersBound();
-	glDrawArrays(GL_TRIANGLES, 0, 6);
+	deviceCommandContext->Draw(0, 6);
 
 	g_Renderer.SetViewport(oldVp);
 
 	tech->EndPass();
 }
 
 void CPostprocManager::ApplyBlurGauss(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	Renderer::Backend::GL::CTexture* inTex,
 	Renderer::Backend::GL::CTexture* tempTex,
 	Renderer::Backend::GL::CFramebuffer* tempFramebuffer,
 	Renderer::Backend::GL::CFramebuffer* outFramebuffer,
 	int inWidth, int inHeight)
 {
 	deviceCommandContext->SetFramebuffer(tempFramebuffer);
 
 	// Get bloom shader, for a horizontal Gaussian blur pass.
 	CShaderDefines defines2;
 	defines2.Add(str_BLOOM_PASS_H, str_1);
 	CShaderTechniquePtr tech = g_Renderer.GetShaderManager().LoadEffect(str_bloom, defines2);
 
 	tech->BeginPass();
 	deviceCommandContext->SetGraphicsPipelineState(
 		tech->GetGraphicsPipelineStateDesc());
 	CShaderProgramPtr shader = tech->GetShader();
 	shader->BindTexture(str_renderedTex, inTex);
 	shader->Uniform(str_texSize, inWidth, inHeight, 0.0f, 0.0f);
 
 	const SViewPort oldVp = g_Renderer.GetViewport();
 	const SViewPort vp = { 0, 0, inWidth, inHeight };
 	g_Renderer.SetViewport(vp);
 
 	float quadVerts[] =
 	{
 		1.0f, 1.0f,
 		-1.0f, 1.0f,
 		-1.0f, -1.0f,
 
 		-1.0f, -1.0f,
 		1.0f, -1.0f,
 		1.0f, 1.0f
 	};
 	float quadTex[] =
 	{
 		1.0f, 1.0f,
 		0.0f, 1.0f,
 		0.0f, 0.0f,
 
 		0.0f, 0.0f,
 		1.0f, 0.0f,
 		1.0f, 1.0f
 	};
 	shader->TexCoordPointer(GL_TEXTURE0, 2, GL_FLOAT, 0, quadTex);
 	shader->VertexPointer(2, GL_FLOAT, 0, quadVerts);
 	shader->AssertPointersBound();
-	glDrawArrays(GL_TRIANGLES, 0, 6);
+	deviceCommandContext->Draw(0, 6);
 
 	g_Renderer.SetViewport(oldVp);
 
 	tech->EndPass();
 
 	deviceCommandContext->SetFramebuffer(outFramebuffer);
 
 	// Get bloom shader, for a vertical Gaussian blur pass.
 	CShaderDefines defines3;
 	defines3.Add(str_BLOOM_PASS_V, str_1);
 	tech = g_Renderer.GetShaderManager().LoadEffect(str_bloom, defines3);
 
 	tech->BeginPass();
 	shader = tech->GetShader();
 
 	// Our input texture to the shader is the output of the horizontal pass.
 	shader->BindTexture(str_renderedTex, tempTex);
 	shader->Uniform(str_texSize, inWidth, inHeight, 0.0f, 0.0f);
 
 	g_Renderer.SetViewport(vp);
 
 	shader->TexCoordPointer(GL_TEXTURE0, 2, GL_FLOAT, 0, quadTex);
 	shader->VertexPointer(2, GL_FLOAT, 0, quadVerts);
 	shader->AssertPointersBound();
-	glDrawArrays(GL_TRIANGLES, 0, 6);
+	deviceCommandContext->Draw(0, 6);
 
 	g_Renderer.SetViewport(oldVp);
 
 	tech->EndPass();
 }
 
 void CPostprocManager::ApplyBlur(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	uint32_t width = m_Width, height = m_Height;
 	Renderer::Backend::GL::CTexture* previousTexture =
 		(m_WhichBuffer ? m_ColorTex1 : m_ColorTex2).get();
 
 	for (BlurScale& scale : m_BlurScales)
 	{
 		ApplyBlurDownscale2x(deviceCommandContext, scale.steps[0].framebuffer.get(), previousTexture, width, height);
 		width /= 2;
 		height /= 2;
 		ApplyBlurGauss(deviceCommandContext, scale.steps[0].texture.get(),
 			scale.steps[1].texture.get(), scale.steps[1].framebuffer.get(),
 			scale.steps[0].framebuffer.get(), width, height);
 	}
 }
 
 
 void CPostprocManager::CaptureRenderOutput(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	ENSURE(m_IsInitialized);
 
 	// Leaves m_PingFbo selected for rendering; m_WhichBuffer stays true at this point.
 
 	if (m_UsingMultisampleBuffer)
 		deviceCommandContext->SetFramebuffer(m_MultisampleFramebuffer.get());
 	else
 		deviceCommandContext->SetFramebuffer(m_CaptureFramebuffer.get());
 
 	m_WhichBuffer = true;
 }
 
 
 void CPostprocManager::ReleaseRenderOutput(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	ENSURE(m_IsInitialized);
 
 	GPU_SCOPED_LABEL(deviceCommandContext, "Copy postproc to backbuffer");
 
 	// We blit to the backbuffer from the previous active buffer.
 	deviceCommandContext->BlitFramebuffer(
 		deviceCommandContext->GetDevice()->GetCurrentBackbuffer(),
 		(m_WhichBuffer ? m_PingFramebuffer : m_PongFramebuffer).get());
 
 	deviceCommandContext->SetFramebuffer(
 		deviceCommandContext->GetDevice()->GetCurrentBackbuffer());
 }
 
 void CPostprocManager::ApplyEffect(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CShaderTechniquePtr& shaderTech, int pass)
 {
 	// select the other FBO for rendering
 	deviceCommandContext->SetFramebuffer(
 		(m_WhichBuffer ? m_PongFramebuffer : m_PingFramebuffer).get());
 
 	shaderTech->BeginPass(pass);
 	deviceCommandContext->SetGraphicsPipelineState(
 		shaderTech->GetGraphicsPipelineStateDesc(pass));
 	const CShaderProgramPtr& shader = shaderTech->GetShader(pass);
 
 	// Use the textures from the current FBO as input to the shader.
 	// We also bind a bunch of other textures and parameters, but since
 	// this only happens once per frame the overhead is negligible.
 	if (m_WhichBuffer)
 		shader->BindTexture(str_renderedTex, m_ColorTex1.get());
 	else
 		shader->BindTexture(str_renderedTex, m_ColorTex2.get());
 
 	shader->BindTexture(str_depthTex, m_DepthTex.get());
 
 	shader->BindTexture(str_blurTex2, m_BlurScales[0].steps[0].texture.get());
 	shader->BindTexture(str_blurTex4, m_BlurScales[1].steps[0].texture.get());
 	shader->BindTexture(str_blurTex8, m_BlurScales[2].steps[0].texture.get());
 
 	shader->Uniform(str_width, m_Width);
 	shader->Uniform(str_height, m_Height);
 	shader->Uniform(str_zNear, m_NearPlane);
 	shader->Uniform(str_zFar, m_FarPlane);
 
 	shader->Uniform(str_sharpness, m_Sharpness);
 
 	shader->Uniform(str_brightness, g_LightEnv.m_Brightness);
 	shader->Uniform(str_hdr, g_LightEnv.m_Contrast);
 	shader->Uniform(str_saturation, g_LightEnv.m_Saturation);
 	shader->Uniform(str_bloom, g_LightEnv.m_Bloom);
 
 	float quadVerts[] =
 	{
 		1.0f, 1.0f,
 		-1.0f, 1.0f,
 		-1.0f, -1.0f,
 
 		-1.0f, -1.0f,
 		1.0f, -1.0f,
 		1.0f, 1.0f
 	};
 	float quadTex[] =
 	{
 		1.0f, 1.0f,
 		0.0f, 1.0f,
 		0.0f, 0.0f,
 
 		0.0f, 0.0f,
 		1.0f, 0.0f,
 		1.0f, 1.0f
 	};
 	shader->TexCoordPointer(GL_TEXTURE0, 2, GL_FLOAT, 0, quadTex);
 	shader->VertexPointer(2, GL_FLOAT, 0, quadVerts);
 	shader->AssertPointersBound();
-	glDrawArrays(GL_TRIANGLES, 0, 6);
+	deviceCommandContext->Draw(0, 6);
 
 	shaderTech->EndPass(pass);
 
 	m_WhichBuffer = !m_WhichBuffer;
 }
 
 void CPostprocManager::ApplyPostproc(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	ENSURE(m_IsInitialized);
 
 	// Don't do anything if we are using the default effect and no AA.
 	const bool hasEffects = m_PostProcEffect != L"default";
 	const bool hasARB = g_VideoMode.GetBackend() == CVideoMode::Backend::GL_ARB;
 	const bool hasAA = m_AATech && !hasARB;
 	const bool hasSharp = m_SharpTech && !hasARB;
 	if (!hasEffects && !hasAA && !hasSharp)
 		return;
 
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render postproc");
 
 	if (hasEffects)
 	{
 		// First render blur textures. Note that this only happens ONLY ONCE, before any effects are applied!
 		// (This may need to change depending on future usage, however that will have a fps hit)
 		ApplyBlur(deviceCommandContext);
 		for (int pass = 0; pass < m_PostProcTech->GetNumPasses(); ++pass)
 			ApplyEffect(deviceCommandContext, m_PostProcTech, pass);
 	}
 
 	if (hasAA)
 	{
 		for (int pass = 0; pass < m_AATech->GetNumPasses(); ++pass)
 			ApplyEffect(deviceCommandContext, m_AATech, pass);
 	}
 
 	if (hasSharp)
 	{
 		for (int pass = 0; pass < m_SharpTech->GetNumPasses(); ++pass)
 			ApplyEffect(deviceCommandContext, m_SharpTech, pass);
 	}
 }
 
 
 // Generate list of available effect-sets
 std::vector<CStrW> CPostprocManager::GetPostEffects()
 {
 	std::vector<CStrW> effects;
 
 	const VfsPath folder(L"shaders/effects/postproc/");
 
 	VfsPaths pathnames;
 	if (vfs::GetPathnames(g_VFS, folder, 0, pathnames) < 0)
 		LOGERROR("Error finding Post effects in '%s'", folder.string8());
 
 	for (const VfsPath& path : pathnames)
 		if (path.Extension() == L".xml")
 			effects.push_back(path.Basename().string());
 
 	// Add the default "null" effect to the list.
 	effects.push_back(L"default");
 
 	sort(effects.begin(), effects.end());
 
 	return effects;
 }
 
 void CPostprocManager::SetPostEffect(const CStrW& name)
 {
 	if (m_IsInitialized)
 	{
 		if (name != L"default")
 		{
 			CStrW n = L"postproc/" + name;
 			m_PostProcTech = g_Renderer.GetShaderManager().LoadEffect(CStrIntern(n.ToUTF8()));
 		}
 	}
 
 	m_PostProcEffect = name;
 }
 
 void CPostprocManager::UpdateAntiAliasingTechnique()
 {
 	if (g_VideoMode.GetBackend() == CVideoMode::Backend::GL_ARB || !m_IsInitialized)
 		return;
 
 	CStr newAAName;
 	CFG_GET_VAL("antialiasing", newAAName);
 	if (m_AAName == newAAName)
 		return;
 	m_AAName = newAAName;
 	m_AATech.reset();
 
 	if (m_UsingMultisampleBuffer)
 	{
 		m_UsingMultisampleBuffer = false;
 		DestroyMultisampleBuffer();
 	}
 
 	// We have to hardcode names in the engine, because anti-aliasing
 	// techinques strongly depend on the graphics pipeline.
 	// We might use enums in future though.
 	const CStr msaaPrefix = "msaa";
 	if (m_AAName == "fxaa")
 	{
 		m_AATech = g_Renderer.GetShaderManager().LoadEffect(CStrIntern("fxaa"));
 	}
 	else if (m_AAName.size() > msaaPrefix.size() && m_AAName.substr(0, msaaPrefix.size()) == msaaPrefix)
 	{
 		// We don't want to enable MSAA in Atlas, because it uses wxWidgets and its canvas.
 		if (g_AtlasGameLoop && g_AtlasGameLoop->running)
 			return;
 		if (!g_VideoMode.GetBackendDevice()->GetCapabilities().multisampling && !m_AllowedSampleCounts.empty())
 		{
 			LOGWARNING("MSAA is unsupported.");
 			return;
 		}
 		std::stringstream ss(m_AAName.substr(msaaPrefix.size()));
 		ss >> m_MultisampleCount;
 		if (std::find(std::begin(m_AllowedSampleCounts), std::end(m_AllowedSampleCounts), m_MultisampleCount) ==
 		        std::end(m_AllowedSampleCounts))
 		{
 			m_MultisampleCount = 4;
 			LOGWARNING("Wrong MSAA sample count: %s.", m_AAName.EscapeToPrintableASCII().c_str());
 		}
 		m_UsingMultisampleBuffer = true;
 		CreateMultisampleBuffer();
 	}
 }
 
 void CPostprocManager::UpdateSharpeningTechnique()
 {
 	if (g_VideoMode.GetBackend() == CVideoMode::Backend::GL_ARB || !m_IsInitialized)
 		return;
 
 	CStr newSharpName;
 	CFG_GET_VAL("sharpening", newSharpName);
 	if (m_SharpName == newSharpName)
 		return;
 	m_SharpName = newSharpName;
 	m_SharpTech.reset();
 
 	if (m_SharpName == "cas")
 	{
 		m_SharpTech = g_Renderer.GetShaderManager().LoadEffect(CStrIntern(m_SharpName));
 	}
 }
 
 void CPostprocManager::UpdateSharpnessFactor()
 {
 	CFG_GET_VAL("sharpness", m_Sharpness);
 }
 
 void CPostprocManager::SetDepthBufferClipPlanes(float nearPlane, float farPlane)
 {
 	m_NearPlane = nearPlane;
 	m_FarPlane = farPlane;
 }
 
 void CPostprocManager::CreateMultisampleBuffer()
 {
 	Renderer::Backend::GL::CDevice* backendDevice = g_VideoMode.GetBackendDevice();
 
 	m_MultisampleColorTex = backendDevice->CreateTexture("PostProcColorMS",
 		Renderer::Backend::GL::CTexture::Type::TEXTURE_2D_MULTISAMPLE,
 		Renderer::Backend::Format::R8G8B8A8, m_Width, m_Height,
 		Renderer::Backend::Sampler::MakeDefaultSampler(
 			Renderer::Backend::Sampler::Filter::LINEAR,
 			Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE), 1, m_MultisampleCount);
 
 	// Allocate the Depth/Stencil texture.
 	m_MultisampleDepthTex = backendDevice->CreateTexture("PostProcDepthMS",
 		Renderer::Backend::GL::CTexture::Type::TEXTURE_2D_MULTISAMPLE,
 		Renderer::Backend::Format::D24_S8, m_Width, m_Height,
 		Renderer::Backend::Sampler::MakeDefaultSampler(
 			Renderer::Backend::Sampler::Filter::LINEAR,
 			Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE), 1, m_MultisampleCount);
 
 	// Set up the framebuffers with some initial textures.
 	m_MultisampleFramebuffer = backendDevice->CreateFramebuffer("PostprocMultisampleFramebuffer",
 		m_MultisampleColorTex.get(), m_MultisampleDepthTex.get(),
 		g_VideoMode.GetBackendDevice()->GetCurrentBackbuffer()->GetClearColor());
 
 	if (!m_MultisampleFramebuffer)
 	{
 		LOGERROR("Failed to create postproc multisample framebuffer");
 		m_UsingMultisampleBuffer = false;
 		DestroyMultisampleBuffer();
 	}
 }
 
 void CPostprocManager::DestroyMultisampleBuffer()
 {
 	if (m_UsingMultisampleBuffer)
 		return;
 	m_MultisampleFramebuffer.reset();
 	m_MultisampleColorTex.reset();
 	m_MultisampleDepthTex.reset();
 }
 
 bool CPostprocManager::IsMultisampleEnabled() const
 {
 	return m_UsingMultisampleBuffer;
 }
 
 void CPostprocManager::ResolveMultisampleFramebuffer(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	if (!m_UsingMultisampleBuffer)
 		return;
 
 	GPU_SCOPED_LABEL(deviceCommandContext, "Resolve postproc multisample");
 	deviceCommandContext->BlitFramebuffer(
 		m_PingFramebuffer.get(), m_MultisampleFramebuffer.get());
 	deviceCommandContext->SetFramebuffer(m_PingFramebuffer.get());
 }
 
 #else
 
 #warning TODO: implement PostprocManager for GLES
 
 void ApplyBlurDownscale2x(
 	Renderer::Backend::GL::CDeviceCommandContext* UNUSED(deviceCommandContext),
 	Renderer::Backend::GL::CFramebuffer* UNUSED(framebuffer),
 	Renderer::Backend::GL::CTexture* UNUSED(inTex),
 	int UNUSED(inWidth), int UNUSED(inHeight))
 {
 }
 
 void CPostprocManager::ApplyBlurGauss(
 	Renderer::Backend::GL::CDeviceCommandContext* UNUSED(deviceCommandContext),
 	Renderer::Backend::GL::CTexture* UNUSED(inTex),
 	Renderer::Backend::GL::CTexture* UNUSED(tempTex),
 	Renderer::Backend::GL::CFramebuffer* UNUSED(tempFramebuffer),
 	Renderer::Backend::GL::CFramebuffer* UNUSED(outFramebuffer),
 	int UNUSED(inWidth), int UNUSED(inHeight))
 {
 }
 
 void CPostprocManager::ApplyEffect(
 	Renderer::Backend::GL::CDeviceCommandContext* UNUSED(deviceCommandContext),
 	const CShaderTechniquePtr& UNUSED(shaderTech), int UNUSED(pass))
 {
 }
 
 CPostprocManager::CPostprocManager()
 {
 }
 
 CPostprocManager::~CPostprocManager()
 {
 }
 
 bool CPostprocManager::IsEnabled() const
 {
 	return false;
 }
 
 void CPostprocManager::Initialize()
 {
 }
 
 void CPostprocManager::Resize()
 {
 }
 
 void CPostprocManager::Cleanup()
 {
 }
 
 void CPostprocManager::RecreateBuffers()
 {
 }
 
 std::vector<CStrW> CPostprocManager::GetPostEffects()
 {
 	return std::vector<CStrW>();
 }
 
 void CPostprocManager::SetPostEffect(const CStrW& UNUSED(name))
 {
 }
 
 void CPostprocManager::SetDepthBufferClipPlanes(float UNUSED(nearPlane), float UNUSED(farPlane))
 {
 }
 
 void CPostprocManager::UpdateAntiAliasingTechnique()
 {
 }
 
 void CPostprocManager::UpdateSharpeningTechnique()
 {
 }
 
 void CPostprocManager::UpdateSharpnessFactor()
 {
 }
 
 void CPostprocManager::CaptureRenderOutput(
 	Renderer::Backend::GL::CDeviceCommandContext* UNUSED(deviceCommandContext))
 {
 }
 
 void CPostprocManager::ApplyPostproc(
 	Renderer::Backend::GL::CDeviceCommandContext* UNUSED(deviceCommandContext))
 {
 }
 
 void CPostprocManager::ReleaseRenderOutput(
 	Renderer::Backend::GL::CDeviceCommandContext* UNUSED(deviceCommandContext))
 {
 }
 
 void CPostprocManager::CreateMultisampleBuffer()
 {
 }
 
 void CPostprocManager::DestroyMultisampleBuffer()
 {
 }
 
 bool CPostprocManager::IsMultisampleEnabled() const
 {
 	return false;
 }
 
 void CPostprocManager::ResolveMultisampleFramebuffer(
 	Renderer::Backend::GL::CDeviceCommandContext* UNUSED(deviceCommandContext))
 {
 }
 
 #endif
Index: ps/trunk/source/renderer/ShadowMap.cpp
===================================================================
--- ps/trunk/source/renderer/ShadowMap.cpp	(revision 26524)
+++ ps/trunk/source/renderer/ShadowMap.cpp	(revision 26525)
@@ -1,772 +1,772 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "ShadowMap.h"
 
 #include "graphics/Camera.h"
 #include "graphics/LightEnv.h"
 #include "graphics/ShaderManager.h"
 #include "gui/GUIMatrix.h"
 #include "lib/bits.h"
 #include "lib/ogl.h"
 #include "maths/BoundingBoxAligned.h"
 #include "maths/Brush.h"
 #include "maths/Frustum.h"
 #include "maths/MathUtil.h"
 #include "maths/Matrix3D.h"
 #include "ps/CLogger.h"
 #include "ps/ConfigDB.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Profile.h"
 #include "ps/VideoMode.h"
 #include "renderer/backend/gl/Device.h"
 #include "renderer/backend/gl/Texture.h"
 #include "renderer/DebugRenderer.h"
 #include "renderer/Renderer.h"
 #include "renderer/RenderingOptions.h"
 #include "renderer/SceneRenderer.h"
 
 #include <array>
 
 namespace
 {
 
 constexpr int MAX_CASCADE_COUNT = 4;
 
 constexpr float DEFAULT_SHADOWS_CUTOFF_DISTANCE = 300.0f;
 constexpr float DEFAULT_CASCADE_DISTANCE_RATIO = 1.7f;
 
 } // anonymous namespace
 
 /**
  * Struct ShadowMapInternals: Internal data for the ShadowMap implementation
  */
 struct ShadowMapInternals
 {
 	std::unique_ptr<Renderer::Backend::GL::CFramebuffer> Framebuffer;
 	std::unique_ptr<Renderer::Backend::GL::CTexture> Texture;
 
 	// bit depth for the depth texture
 	int DepthTextureBits;
 	// width, height of shadow map
 	int Width, Height;
 	// Shadow map quality (-1 - Low, 0 - Medium, 1 - High, 2 - Very High)
 	int QualityLevel;
 	// used width, height of shadow map
 	int EffectiveWidth, EffectiveHeight;
 
 	// Transform world space into light space; calculated on SetupFrame
 	CMatrix3D LightTransform;
 
 	// transform light space into world space
 	CMatrix3D InvLightTransform;
 	CBoundingBoxAligned ShadowReceiverBound;
 
 	int CascadeCount;
 	float CascadeDistanceRatio;
 	float ShadowsCutoffDistance;
 	bool ShadowsCoverMap;
 
 	struct Cascade
 	{
 		// transform light space into projected light space
 		// in projected light space, the shadowbound box occupies the [-1..1] cube
 		// calculated on BeginRender, after the final shadow bounds are known
 		CMatrix3D LightProjection;
 		float Distance;
 		CBoundingBoxAligned FrustumBBAA;
 		CBoundingBoxAligned ConvexBounds;
 		CBoundingBoxAligned ShadowRenderBound;
 		// Bounding box of shadowed objects in the light space.
 		CBoundingBoxAligned ShadowCasterBound;
 		// Transform world space into texture space of the shadow map;
 		// calculated on BeginRender, after the final shadow bounds are known
 		CMatrix3D TextureMatrix;
 		// View port of the shadow texture where the cascade should be rendered.
 		SViewPort ViewPort;
 	};
 	std::array<Cascade, MAX_CASCADE_COUNT> Cascades;
 
 	// Camera transformed into light space
 	CCamera LightspaceCamera;
 
 	// Some drivers (at least some Intel Mesa ones) appear to handle alpha testing
 	// incorrectly when the FBO has only a depth attachment.
 	// When m_ShadowAlphaFix is true, we use DummyTexture to store a useless
 	// alpha texture which is attached to the FBO as a workaround.
 	std::unique_ptr<Renderer::Backend::GL::CTexture> DummyTexture;
 
 	// Copy of renderer's standard view camera, saved between
 	// BeginRender and EndRender while we replace it with the shadow camera
 	CCamera SavedViewCamera;
 
 	void CalculateShadowMatrices(const int cascade);
 	void CreateTexture();
 	void UpdateCascadesParameters();
 };
 
 void ShadowMapInternals::UpdateCascadesParameters()
 {
 	CascadeCount = 1;
 	CFG_GET_VAL("shadowscascadecount", CascadeCount);
 
 	if (CascadeCount < 1 || CascadeCount > MAX_CASCADE_COUNT || g_VideoMode.GetBackend() == CVideoMode::Backend::GL_ARB)
 		CascadeCount = 1;
 
 	ShadowsCoverMap = false;
 	CFG_GET_VAL("shadowscovermap", ShadowsCoverMap);
 }
 
 void CalculateBoundsForCascade(
 	const CCamera& camera, const CMatrix3D& lightTransform,
 	const float nearPlane, const float farPlane, CBoundingBoxAligned* bbaa,
 	CBoundingBoxAligned* frustumBBAA)
 {
 	frustumBBAA->SetEmpty();
 
 	// We need to calculate a circumscribed sphere for the camera to
 	// create a rotation stable bounding box.
 	const CVector3D cameraIn = camera.m_Orientation.GetIn();
 	const CVector3D cameraTranslation = camera.m_Orientation.GetTranslation();
 	const CVector3D centerNear = cameraTranslation + cameraIn * nearPlane;
 	const CVector3D centerDist = cameraTranslation + cameraIn * farPlane;
 
 	// We can solve 3D problem in 2D space, because the frustum is
 	// symmetric by 2 planes. Than means we can use only one corner
 	// to find a circumscribed sphere.
 	CCamera::Quad corners;
 
 	camera.GetViewQuad(nearPlane, corners);
 	for (CVector3D& corner : corners)
 		corner = camera.GetOrientation().Transform(corner);
 	const CVector3D cornerNear = corners[0];
 	for (const CVector3D& corner : corners)
 		*frustumBBAA += lightTransform.Transform(corner);
 
 	camera.GetViewQuad(farPlane, corners);
 	for (CVector3D& corner : corners)
 		corner = camera.GetOrientation().Transform(corner);
 	const CVector3D cornerDist = corners[0];
 	for (const CVector3D& corner : corners)
 		*frustumBBAA += lightTransform.Transform(corner);
 
 	// We solve 2D case for the right trapezoid.
 	const float firstBase = (cornerNear - centerNear).Length();
 	const float secondBase = (cornerDist - centerDist).Length();
 	const float height = (centerDist - centerNear).Length();
 	const float distanceToCenter =
 		(height * height + secondBase * secondBase - firstBase * firstBase) * 0.5f / height;
 
 	CVector3D position = cameraTranslation + cameraIn * (nearPlane + distanceToCenter);
 	const float radius = (cornerNear - position).Length();
 
 	// We need to convert the bounding box to the light space.
 	position = lightTransform.Rotate(position);
 
 	const float insets = 0.2f;
 	*bbaa = CBoundingBoxAligned(position, position);
 	bbaa->Expand(radius);
 	bbaa->Expand(insets);
 }
 
 ShadowMap::ShadowMap()
 {
 	m = new ShadowMapInternals;
 	m->Framebuffer = 0;
 	m->Width = 0;
 	m->Height = 0;
 	m->QualityLevel = 0;
 	m->EffectiveWidth = 0;
 	m->EffectiveHeight = 0;
 	m->DepthTextureBits = 0;
 	// DepthTextureBits: 24/32 are very much faster than 16, on GeForce 4 and FX;
 	// but they're very much slower on Radeon 9800.
 	// In both cases, the default (no specified depth) is fast, so we just use
 	// that by default and hope it's alright. (Otherwise, we'd probably need to
 	// do some kind of hardware detection to work out what to use.)
 
 	// Avoid using uninitialised values in AddShadowedBound if SetupFrame wasn't called first
 	m->LightTransform.SetIdentity();
 
 	m->UpdateCascadesParameters();
 }
 
 ShadowMap::~ShadowMap()
 {
 	m->Framebuffer.reset();
 	m->Texture.reset();
 	m->DummyTexture.reset();
 
 	delete m;
 }
 
 // Force the texture/buffer/etc to be recreated, particularly when the renderer's
 // size has changed
 void ShadowMap::RecreateTexture()
 {
 	m->Framebuffer.reset();
 	m->Texture.reset();
 	m->DummyTexture.reset();
 
 	m->UpdateCascadesParameters();
 
 	// (Texture will be constructed in next SetupFrame)
 }
 
 // SetupFrame: camera and light direction for this frame
 void ShadowMap::SetupFrame(const CCamera& camera, const CVector3D& lightdir)
 {
 	if (!m->Texture)
 		m->CreateTexture();
 
 	CVector3D x(0, 1, 0), eyepos;
 
 	CVector3D z = lightdir;
 	z.Normalize();
 	x -= z * z.Dot(x);
 	if (x.Length() < 0.001)
 	{
 		// this is invoked if the camera and light directions almost coincide
 		// assumption: light direction has a significant Z component
 		x = CVector3D(1.0, 0.0, 0.0);
 		x -= z * z.Dot(x);
 	}
 	x.Normalize();
 	CVector3D y = z.Cross(x);
 
 	// X axis perpendicular to light direction, flowing along with view direction
 	m->LightTransform._11 = x.X;
 	m->LightTransform._12 = x.Y;
 	m->LightTransform._13 = x.Z;
 
 	// Y axis perpendicular to light and view direction
 	m->LightTransform._21 = y.X;
 	m->LightTransform._22 = y.Y;
 	m->LightTransform._23 = y.Z;
 
 	// Z axis is in direction of light
 	m->LightTransform._31 = z.X;
 	m->LightTransform._32 = z.Y;
 	m->LightTransform._33 = z.Z;
 
 	// eye is at the origin of the coordinate system
 	m->LightTransform._14 = -x.Dot(eyepos);
 	m->LightTransform._24 = -y.Dot(eyepos);
 	m->LightTransform._34 = -z.Dot(eyepos);
 
 	m->LightTransform._41 = 0.0;
 	m->LightTransform._42 = 0.0;
 	m->LightTransform._43 = 0.0;
 	m->LightTransform._44 = 1.0;
 
 	m->LightTransform.GetInverse(m->InvLightTransform);
 	m->ShadowReceiverBound.SetEmpty();
 
 	m->LightspaceCamera = camera;
 	m->LightspaceCamera.m_Orientation = m->LightTransform * camera.m_Orientation;
 	m->LightspaceCamera.UpdateFrustum();
 
 	m->ShadowsCutoffDistance = DEFAULT_SHADOWS_CUTOFF_DISTANCE;
 	m->CascadeDistanceRatio = DEFAULT_CASCADE_DISTANCE_RATIO;
 	CFG_GET_VAL("shadowscutoffdistance", m->ShadowsCutoffDistance);
 	CFG_GET_VAL("shadowscascadedistanceratio", m->CascadeDistanceRatio);
 	m->CascadeDistanceRatio = Clamp(m->CascadeDistanceRatio, 1.1f, 16.0f);
 
 	m->Cascades[GetCascadeCount() - 1].Distance = m->ShadowsCutoffDistance;
 	for (int cascade = GetCascadeCount() - 2; cascade >= 0; --cascade)
 		m->Cascades[cascade].Distance = m->Cascades[cascade + 1].Distance / m->CascadeDistanceRatio;
 
 	if (GetCascadeCount() == 1 || m->ShadowsCoverMap)
 	{
 		m->Cascades[0].ViewPort =
 			SViewPort{1, 1, m->EffectiveWidth - 2, m->EffectiveHeight - 2};
 		if (m->ShadowsCoverMap)
 			m->Cascades[0].Distance = camera.GetFarPlane();
 	}
 	else
 	{
 		for (int cascade = 0; cascade < GetCascadeCount(); ++cascade)
 		{
 			const int offsetX = (cascade & 0x1) ? m->EffectiveWidth / 2 : 0;
 			const int offsetY = (cascade & 0x2) ? m->EffectiveHeight / 2 : 0;
 			m->Cascades[cascade].ViewPort =
 				SViewPort{offsetX + 1, offsetY + 1,
 				m->EffectiveWidth / 2 - 2, m->EffectiveHeight / 2 - 2};
 		}
 	}
 
 	for (int cascadeIdx = 0; cascadeIdx < GetCascadeCount(); ++cascadeIdx)
 	{
 		ShadowMapInternals::Cascade& cascade = m->Cascades[cascadeIdx];
 
 		const float nearPlane = cascadeIdx > 0 ?
 			m->Cascades[cascadeIdx - 1].Distance : camera.GetNearPlane();
 		const float farPlane = cascade.Distance;
 
 		CalculateBoundsForCascade(camera, m->LightTransform,
 			nearPlane, farPlane, &cascade.ConvexBounds, &cascade.FrustumBBAA);
 		cascade.ShadowCasterBound.SetEmpty();
 	}
 }
 
 // AddShadowedBound: add a world-space bounding box to the bounds of shadowed
 // objects
 void ShadowMap::AddShadowCasterBound(const int cascade, const CBoundingBoxAligned& bounds)
 {
 	CBoundingBoxAligned lightspacebounds;
 
 	bounds.Transform(m->LightTransform, lightspacebounds);
 	m->Cascades[cascade].ShadowCasterBound += lightspacebounds;
 }
 
 void ShadowMap::AddShadowReceiverBound(const CBoundingBoxAligned& bounds)
 {
 	CBoundingBoxAligned lightspacebounds;
 
 	bounds.Transform(m->LightTransform, lightspacebounds);
 	m->ShadowReceiverBound += lightspacebounds;
 }
 
 CFrustum ShadowMap::GetShadowCasterCullFrustum(const int cascade)
 {
 	// Get the bounds of all objects that can receive shadows
 	CBoundingBoxAligned bound = m->ShadowReceiverBound;
 
 	// Intersect with the camera frustum, so the shadow map doesn't have to get
 	// stretched to cover the off-screen parts of large models
 	bound.IntersectFrustumConservative(m->Cascades[cascade].FrustumBBAA.ToFrustum());
 
 	// ShadowBound might have been empty to begin with, producing an empty result
 	if (bound.IsEmpty())
 	{
 		// CFrustum can't easily represent nothingness, so approximate it with
 		// a single point which won't match many objects
 		bound += CVector3D(0.0f, 0.0f, 0.0f);
 		return bound.ToFrustum();
 	}
 
 	// Extend the bounds a long way towards the light source, to encompass
 	// all objects that might cast visible shadows.
 	// (The exact constant was picked entirely arbitrarily.)
 	bound[0].Z -= 1000.f;
 
 	CFrustum frustum = bound.ToFrustum();
 	frustum.Transform(m->InvLightTransform);
 	return frustum;
 }
 
 // CalculateShadowMatrices: calculate required matrices for shadow map generation - the light's
 // projection and transformation matrices
 void ShadowMapInternals::CalculateShadowMatrices(const int cascade)
 {
 	CBoundingBoxAligned& shadowRenderBound = Cascades[cascade].ShadowRenderBound;
 	shadowRenderBound = Cascades[cascade].ConvexBounds;
 
 	if (ShadowsCoverMap)
 	{
 		// Start building the shadow map to cover all objects that will receive shadows
 		CBoundingBoxAligned receiverBound = ShadowReceiverBound;
 
 		// Intersect with the camera frustum, so the shadow map doesn't have to get
 		// stretched to cover the off-screen parts of large models
 		receiverBound.IntersectFrustumConservative(LightspaceCamera.GetFrustum());
 
 		// Intersect with the shadow caster bounds, because there's no point
 		// wasting space around the edges of the shadow map that we're not going
 		// to draw into
 		shadowRenderBound[0].X = std::max(receiverBound[0].X, Cascades[cascade].ShadowCasterBound[0].X);
 		shadowRenderBound[0].Y = std::max(receiverBound[0].Y, Cascades[cascade].ShadowCasterBound[0].Y);
 		shadowRenderBound[1].X = std::min(receiverBound[1].X, Cascades[cascade].ShadowCasterBound[1].X);
 		shadowRenderBound[1].Y = std::min(receiverBound[1].Y, Cascades[cascade].ShadowCasterBound[1].Y);
 	}
 	else if (CascadeCount > 1)
 	{
 		// We need to offset the cascade to its place on the texture.
 		const CVector3D size = (shadowRenderBound[1] - shadowRenderBound[0]) * 0.5f;
 		if (!(cascade & 0x1))
 			shadowRenderBound[1].X += size.X * 2.0f;
 		else
 			shadowRenderBound[0].X -= size.X * 2.0f;
 		if (!(cascade & 0x2))
 			shadowRenderBound[1].Y += size.Y * 2.0f;
 		else
 			shadowRenderBound[0].Y -= size.Y * 2.0f;
 	}
 
 	// Set the near and far planes to include just the shadow casters,
 	// so we make full use of the depth texture's range. Add a bit of a
 	// delta so we don't accidentally clip objects that are directly on
 	// the planes.
 	shadowRenderBound[0].Z = Cascades[cascade].ShadowCasterBound[0].Z - 2.f;
 	shadowRenderBound[1].Z = Cascades[cascade].ShadowCasterBound[1].Z + 2.f;
 
 	// Setup orthogonal projection (lightspace -> clip space) for shadowmap rendering
 	CVector3D scale = shadowRenderBound[1] - shadowRenderBound[0];
 	CVector3D shift = (shadowRenderBound[1] + shadowRenderBound[0]) * -0.5;
 
 	if (scale.X < 1.0)
 		scale.X = 1.0;
 	if (scale.Y < 1.0)
 		scale.Y = 1.0;
 	if (scale.Z < 1.0)
 		scale.Z = 1.0;
 
 	scale.X = 2.0 / scale.X;
 	scale.Y = 2.0 / scale.Y;
 	scale.Z = 2.0 / scale.Z;
 
 	// make sure a given world position falls on a consistent shadowmap texel fractional offset
 	float offsetX = fmod(shadowRenderBound[0].X - LightTransform._14, 2.0f/(scale.X*EffectiveWidth));
 	float offsetY = fmod(shadowRenderBound[0].Y - LightTransform._24, 2.0f/(scale.Y*EffectiveHeight));
 
 	CMatrix3D& lightProjection = Cascades[cascade].LightProjection;
 	lightProjection.SetZero();
 	lightProjection._11 = scale.X;
 	lightProjection._14 = (shift.X + offsetX) * scale.X;
 	lightProjection._22 = scale.Y;
 	lightProjection._24 = (shift.Y + offsetY) * scale.Y;
 	lightProjection._33 = scale.Z;
 	lightProjection._34 = shift.Z * scale.Z;
 	lightProjection._44 = 1.0;
 
 	// Calculate texture matrix by creating the clip space to texture coordinate matrix
 	// and then concatenating all matrices that have been calculated so far
 
 	float texscalex = scale.X * 0.5f * (float)EffectiveWidth / (float)Width;
 	float texscaley = scale.Y * 0.5f * (float)EffectiveHeight / (float)Height;
 	float texscalez = scale.Z * 0.5f;
 
 	CMatrix3D lightToTex;
 	lightToTex.SetZero();
 	lightToTex._11 = texscalex;
 	lightToTex._14 = (offsetX - shadowRenderBound[0].X) * texscalex;
 	lightToTex._22 = texscaley;
 	lightToTex._24 = (offsetY - shadowRenderBound[0].Y) * texscaley;
 	lightToTex._33 = texscalez;
 	lightToTex._34 = -shadowRenderBound[0].Z * texscalez;
 	lightToTex._44 = 1.0;
 
 	Cascades[cascade].TextureMatrix = lightToTex * LightTransform;
 }
 
 // Create the shadow map
 void ShadowMapInternals::CreateTexture()
 {
 	// Cleanup
 	Framebuffer.reset();
 	Texture.reset();
 	DummyTexture.reset();
 
 	Renderer::Backend::GL::CDevice* backendDevice = g_VideoMode.GetBackendDevice();
 
 	CFG_GET_VAL("shadowquality", QualityLevel);
 
 	// Get shadow map size as next power of two up from view width/height.
 	int shadowMapSize;
 	switch (QualityLevel)
 	{
 	// Low
 	case -1:
 		shadowMapSize = 512;
 		break;
 	// High
 	case 1:
 		shadowMapSize = 2048;
 		break;
 	// Ultra
 	case 2:
 		shadowMapSize = std::max(round_up_to_pow2(std::max(g_Renderer.GetWidth(), g_Renderer.GetHeight())) * 4, 4096);
 		break;
 	// Medium as is
 	default:
 		shadowMapSize = 1024;
 		break;
 	}
 
 	// Clamp to the maximum texture size.
 	shadowMapSize = std::min(
 		shadowMapSize, static_cast<int>(backendDevice->GetCapabilities().maxTextureSize));
 
 	Width = Height = shadowMapSize;
 
 	// Since we're using a framebuffer object, the whole texture is available
 	EffectiveWidth = Width;
 	EffectiveHeight = Height;
 
 	const char* formatName;
 	Renderer::Backend::Format backendFormat = Renderer::Backend::Format::UNDEFINED;
 #if CONFIG2_GLES
 	formatName = "DEPTH_COMPONENT";
 	backendFormat = Renderer::Backend::Format::D24;
 #else
 	switch (DepthTextureBits)
 	{
 	case 16: formatName = "Format::D16"; backendFormat = Renderer::Backend::Format::D16; break;
 	case 24: formatName = "Format::D24"; backendFormat = Renderer::Backend::Format::D24; break;
 	case 32: formatName = "Format::D32"; backendFormat = Renderer::Backend::Format::D32;  break;
 	default: formatName = "Format::D24"; backendFormat = Renderer::Backend::Format::D24; break;
 	}
 #endif
 	ENSURE(formatName);
 
 	LOGMESSAGE("Creating shadow texture (size %dx%d) (format = %s)",
 		Width, Height, formatName);
 
 	if (g_RenderingOptions.GetShadowAlphaFix())
 	{
 		DummyTexture = backendDevice->CreateTexture2D("ShadowMapDummy",
 			Renderer::Backend::Format::R8G8B8A8, Width, Height,
 			Renderer::Backend::Sampler::MakeDefaultSampler(
 				Renderer::Backend::Sampler::Filter::NEAREST,
 				Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE));
 	}
 
 	Renderer::Backend::Sampler::Desc samplerDesc =
 		Renderer::Backend::Sampler::MakeDefaultSampler(
 #if CONFIG2_GLES
 			// GLES doesn't do depth comparisons, so treat it as a
 			// basic unfiltered depth texture
 			Renderer::Backend::Sampler::Filter::NEAREST,
 #else
 			// Use LINEAR to trigger automatic PCF on some devices.
 			Renderer::Backend::Sampler::Filter::LINEAR,
 #endif
 			Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE);
 	// Enable automatic depth comparisons
 	samplerDesc.compareEnabled = true;
 	samplerDesc.compareOp = Renderer::Backend::CompareOp::LESS_OR_EQUAL;
 
 	Texture = backendDevice->CreateTexture2D("ShadowMapDepth",
 		backendFormat, Width, Height, samplerDesc);
 
 	Framebuffer = backendDevice->CreateFramebuffer("ShadowMapFramebuffer",
 		g_RenderingOptions.GetShadowAlphaFix() ? DummyTexture.get() : nullptr, Texture.get());
 
 	if (!Framebuffer)
 	{
 		LOGERROR("Failed to create shadows framebuffer");
 
 		// Disable shadow rendering (but let the user try again if they want).
 		g_RenderingOptions.SetShadows(false);
 	}
 }
 
 // Set up to render into shadow map texture
 void ShadowMap::BeginRender()
 {
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext =
 		g_Renderer.GetDeviceCommandContext();
 
 	{
 		PROFILE("bind framebuffer");
 		ENSURE(m->Framebuffer);
 		deviceCommandContext->SetFramebuffer(m->Framebuffer.get());
 	}
 
 	// clear buffers
 	{
 		PROFILE("clear depth texture");
 		// In case we used m_ShadowAlphaFix, we ought to clear the unused
 		// color buffer too, else Mali 400 drivers get confused.
 		// Might as well clear stencil too for completeness.
 		deviceCommandContext->ClearFramebuffer();
 	}
 
 	m->SavedViewCamera = g_Renderer.GetSceneRenderer().GetViewCamera();
 }
 
 void ShadowMap::PrepareCamera(const int cascade)
 {
 	m->CalculateShadowMatrices(cascade);
 
 	const SViewPort vp = { 0, 0, m->EffectiveWidth, m->EffectiveHeight };
 	g_Renderer.SetViewport(vp);
 
 	CCamera camera = m->SavedViewCamera;
 	camera.SetProjection(m->Cascades[cascade].LightProjection);
 	camera.GetOrientation() = m->InvLightTransform;
 	g_Renderer.GetSceneRenderer().SetViewCamera(camera);
 
 	const SViewPort& cascadeViewPort = m->Cascades[cascade].ViewPort;
 	Renderer::Backend::GL::CDeviceCommandContext::Rect scissorRect;
 	scissorRect.x = cascadeViewPort.m_X;
 	scissorRect.y = cascadeViewPort.m_Y;
 	scissorRect.width = cascadeViewPort.m_Width;
 	scissorRect.height = cascadeViewPort.m_Height;
 	g_Renderer.GetDeviceCommandContext()->SetScissors(1, &scissorRect);
 }
 
 // Finish rendering into shadow map texture
 void ShadowMap::EndRender()
 {
 	g_Renderer.GetDeviceCommandContext()->SetScissors(0, nullptr);
 
 	g_Renderer.GetSceneRenderer().SetViewCamera(m->SavedViewCamera);
 
 	{
 		PROFILE("unbind framebuffer");
 		g_Renderer.GetDeviceCommandContext()->SetFramebuffer(
 			g_VideoMode.GetBackendDevice()->GetCurrentBackbuffer());
 	}
 
 	const SViewPort vp = { 0, 0, g_Renderer.GetWidth(), g_Renderer.GetHeight() };
 	g_Renderer.SetViewport(vp);
 }
 
 void ShadowMap::BindTo(const CShaderProgramPtr& shader) const
 {
 	if (!shader->GetTextureBinding(str_shadowTex).Active() || !m->Texture)
 		return;
 
 	shader->BindTexture(str_shadowTex, m->Texture.get());
 	shader->Uniform(str_shadowScale, m->Width, m->Height, 1.0f / m->Width, 1.0f / m->Height);
 	const CVector3D cameraForward = g_Renderer.GetSceneRenderer().GetCullCamera().GetOrientation().GetIn();
 	shader->Uniform(str_cameraForward, cameraForward.X, cameraForward.Y, cameraForward.Z,
 		cameraForward.Dot(g_Renderer.GetSceneRenderer().GetCullCamera().GetOrientation().GetTranslation()));
 	if (GetCascadeCount() == 1)
 	{
 		shader->Uniform(str_shadowTransform, m->Cascades[0].TextureMatrix);
 		shader->Uniform(str_shadowDistance, m->Cascades[0].Distance);
 	}
 	else
 	{
 		std::vector<float> shadowDistances;
 		std::vector<CMatrix3D> shadowTransforms;
 		for (const ShadowMapInternals::Cascade& cascade : m->Cascades)
 		{
 			shadowDistances.emplace_back(cascade.Distance);
 			shadowTransforms.emplace_back(cascade.TextureMatrix);
 		}
 		shader->Uniform(str_shadowTransforms_0, GetCascadeCount(), shadowTransforms.data());
 		shader->Uniform(str_shadowTransforms, GetCascadeCount(), shadowTransforms.data());
 		shader->Uniform(str_shadowDistances_0, GetCascadeCount(), shadowDistances.data());
 		shader->Uniform(str_shadowDistances, GetCascadeCount(), shadowDistances.data());
 	}
 }
 
 // Depth texture bits
 int ShadowMap::GetDepthTextureBits() const
 {
 	return m->DepthTextureBits;
 }
 
 void ShadowMap::SetDepthTextureBits(int bits)
 {
 	if (bits != m->DepthTextureBits)
 	{
 		m->Texture.reset();
 		m->Width = m->Height = 0;
 
 		m->DepthTextureBits = bits;
 	}
 }
 
 void ShadowMap::RenderDebugBounds()
 {
 	// Render various shadow bounds:
 	//  Yellow = bounds of objects in view frustum that receive shadows
 	//  Red = culling frustum used to find potential shadow casters
 	//  Blue = frustum used for rendering the shadow map
 
 	const CMatrix3D transform = g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection() * m->InvLightTransform;
 
 	g_Renderer.GetDebugRenderer().DrawBoundingBox(
 		m->ShadowReceiverBound, CColor(1.0f, 1.0f, 0.0f, 1.0f), transform, true);
 
 	for (int cascade = 0; cascade < GetCascadeCount(); ++cascade)
 	{
 		g_Renderer.GetDebugRenderer().DrawBoundingBox(
 			m->Cascades[cascade].ShadowRenderBound, CColor(0.0f, 0.0f, 1.0f, 0.10f), transform);
 		g_Renderer.GetDebugRenderer().DrawBoundingBox(
 			m->Cascades[cascade].ShadowRenderBound, CColor(0.0f, 0.0f, 1.0f, 0.5f), transform, true);
 
 		const CFrustum frustum = GetShadowCasterCullFrustum(cascade);
 		// We don't have a function to create a brush directly from a frustum, so use
 		// the ugly approach of creating a large cube and then intersecting with the frustum
 		const CBoundingBoxAligned dummy(CVector3D(-1e4, -1e4, -1e4), CVector3D(1e4, 1e4, 1e4));
 		CBrush brush(dummy);
 		CBrush frustumBrush;
 		brush.Intersect(frustum, frustumBrush);
 
 		g_Renderer.GetDebugRenderer().DrawBrush(frustumBrush, CColor(1.0f, 0.0f, 0.0f, 0.1f));
 		g_Renderer.GetDebugRenderer().DrawBrush(frustumBrush, CColor(1.0f, 0.0f, 0.0f, 0.1f), true);
 	}
 
 	ogl_WarnIfError();
 }
 
 void ShadowMap::RenderDebugTexture(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	if (!m->Texture)
 		return;
 
 #if !CONFIG2_GLES
 	deviceCommandContext->BindTexture(0, GL_TEXTURE_2D, m->Texture->GetHandle());
 	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_NONE);
 #endif
 
 	CShaderTechniquePtr texTech = g_Renderer.GetShaderManager().LoadEffect(str_canvas2d);
 	texTech->BeginPass();
 	deviceCommandContext->SetGraphicsPipelineState(
 		texTech->GetGraphicsPipelineStateDesc());
 
 	const CShaderProgramPtr& texShader = texTech->GetShader();
 
 	texShader->Uniform(str_transform, GetDefaultGuiMatrix());
 	texShader->BindTexture(str_tex, m->Texture.get());
 	texShader->Uniform(str_colorAdd, CColor(0.0f, 0.0f, 0.0f, 1.0f));
 	texShader->Uniform(str_colorMul, CColor(1.0f, 1.0f, 1.0f, 0.0f));
 	texShader->Uniform(str_grayscaleFactor, 0.0f);
 
 	float s = 256.f;
 	float boxVerts[] =
 	{
  		0,0, 0,s, s,0,
 		s,0, 0,s, s,s
 	};
 	float boxUV[] =
 	{
 		0,0, 0,1, 1,0,
 		1,0, 0,1, 1,1
 	};
 
 	texShader->VertexPointer(2, GL_FLOAT, 0, boxVerts);
 	texShader->TexCoordPointer(GL_TEXTURE0, 2, GL_FLOAT, 0, boxUV);
 	texShader->AssertPointersBound();
-	glDrawArrays(GL_TRIANGLES, 0, 6);
+	deviceCommandContext->Draw(0, 6);
 
 	texTech->EndPass();
 
 #if !CONFIG2_GLES
 	deviceCommandContext->BindTexture(0, GL_TEXTURE_2D, m->Texture->GetHandle());
 	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_R_TO_TEXTURE);
 #endif
 
 	ogl_WarnIfError();
 }
 
 int ShadowMap::GetCascadeCount() const
 {
 #if CONFIG2_GLES
 	return 1;
 #else
 	return m->ShadowsCoverMap ? 1 : m->CascadeCount;
 #endif
 }
Index: ps/trunk/source/renderer/SilhouetteRenderer.cpp
===================================================================
--- ps/trunk/source/renderer/SilhouetteRenderer.cpp	(revision 26524)
+++ ps/trunk/source/renderer/SilhouetteRenderer.cpp	(revision 26525)
@@ -1,508 +1,508 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "SilhouetteRenderer.h"
 
 #include "graphics/Camera.h"
 #include "graphics/HFTracer.h"
 #include "graphics/Model.h"
 #include "graphics/Patch.h"
 #include "graphics/ShaderManager.h"
 #include "maths/MathUtil.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Profile.h"
 #include "renderer/DebugRenderer.h"
 #include "renderer/Renderer.h"
 #include "renderer/Scene.h"
 
 #include <cfloat>
 
 extern int g_xres, g_yres;
 
 // For debugging
 static const bool g_DisablePreciseIntersections = false;
 
 SilhouetteRenderer::SilhouetteRenderer()
 {
 	m_DebugEnabled = false;
 }
 
 void SilhouetteRenderer::AddOccluder(CPatch* patch)
 {
 	m_SubmittedPatchOccluders.push_back(patch);
 }
 
 void SilhouetteRenderer::AddOccluder(CModel* model)
 {
 	m_SubmittedModelOccluders.push_back(model);
 }
 
 void SilhouetteRenderer::AddCaster(CModel* model)
 {
 	m_SubmittedModelCasters.push_back(model);
 }
 
 /*
  * Silhouettes are the solid-colored versions of units that are rendered when
  * standing behind a building or terrain, so the player won't lose them.
  *
  * The rendering is done in CRenderer::RenderSilhouettes, by rendering the
  * units (silhouette casters) and buildings/terrain (silhouette occluders)
  * in an extra pass using depth and stencil buffers. It's very inefficient to
  * render those objects when they're not actually going to contribute to a
  * silhouette.
  *
  * This class is responsible for finding the subset of casters/occluders
  * that might contribute to a silhouette and will need to be rendered.
  *
  * The algorithm is largely based on sweep-and-prune for detecting intersection
  * along a single axis:
  *
  * First we compute the 2D screen-space bounding box of every occluder, and
  * their minimum distance from the camera. We also compute the screen-space
  * position of each caster (approximating them as points, which is not perfect
  * but almost always good enough).
  *
  * We split each occluder's screen-space bounds into a left ('in') edge and
  * right ('out') edge. We put those edges plus the caster points into a list,
  * and sort by x coordinate.
  *
  * Then we walk through the list, maintaining an active set of occluders.
  * An 'in' edge will add an occluder to the set, an 'out' edge will remove it.
  * When we reach a caster point, the active set contains all the occluders that
  * intersect it in x. We do a quick test of y and depth coordinates against
  * each occluder in the set. If they pass that test, we do a more precise ray
  * vs bounding box test (for model occluders) or ray vs patch (for terrain
  * occluders) to see if we really need to render that caster and occluder.
  *
  * Performance relies on the active set being quite small. Given the game's
  * typical occluder sizes and camera angles, this works out okay.
  *
  * We have to do precise ray/patch intersection tests for terrain, because
  * if we just used the patch's bounding box, pretty much every unit would
  * be seen as intersecting the patch it's standing on.
  *
  * We store screen-space coordinates as 14-bit integers (0..16383) because
  * that lets us pack and sort the edge/point list efficiently.
  */
 
 static const u16 g_MaxCoord = 1 << 14;
 static const u16 g_HalfMaxCoord = g_MaxCoord / 2;
 
 struct Occluder
 {
 	CRenderableObject* renderable;
 	bool isPatch;
 	u16 x0, y0, x1, y1;
 	float z;
 	bool rendered;
 };
 
 struct Caster
 {
 	CModel* model;
 	u16 x, y;
 	float z;
 	bool rendered;
 };
 
 enum { EDGE_IN, EDGE_OUT, POINT };
 
 // Entry is essentially:
 //   struct Entry {
 //     u16 id; // index into occluders array
 //     u16 type : 2;
 //     u16 x : 14;
 //  };
 // where x is in the most significant bits, so that sorting as a uint32_t
 // is the same as sorting by x. To avoid worrying about endianness and the
 // compiler's ability to handle bitfields efficiently, we use uint32_t instead
 // of the actual struct.
 
 typedef uint32_t Entry;
 
 static Entry EntryCreate(int type, u16 id, u16 x) { return (x << 18) | (type << 16) | id; }
 static int EntryGetId(Entry e) { return e & 0xffff; }
 static int EntryGetType(Entry e) { return (e >> 16) & 3; }
 
 struct ActiveList
 {
 	std::vector<u16> m_Ids;
 
 	void Add(u16 id)
 	{
 		m_Ids.push_back(id);
 	}
 
 	void Remove(u16 id)
 	{
 		ssize_t sz = m_Ids.size();
 		for (ssize_t i = sz-1; i >= 0; --i)
 		{
 			if (m_Ids[i] == id)
 			{
 				m_Ids[i] = m_Ids[sz-1];
 				m_Ids.pop_back();
 				return;
 			}
 		}
 		debug_warn(L"Failed to find id");
 	}
 };
 
 static void ComputeScreenBounds(Occluder& occluder, const CBoundingBoxAligned& bounds, CMatrix3D& proj)
 {
 	u16 x0 = std::numeric_limits<u16>::max();
 	u16 y0 = std::numeric_limits<u16>::max();
 	u16 x1 = std::numeric_limits<u16>::min();
 	u16 y1 = std::numeric_limits<u16>::min();
 	float z0 = std::numeric_limits<float>::max();
 	for (size_t ix = 0; ix <= 1; ++ix)
 	{
 		for (size_t iy = 0; iy <= 1; ++iy)
 		{
 			for (size_t iz = 0; iz <= 1; ++iz)
 			{
 				CVector4D svec = proj.Transform(CVector4D(bounds[ix].X, bounds[iy].Y, bounds[iz].Z, 1.0f));
 				x0 = std::min(x0,  static_cast<u16>(g_HalfMaxCoord + static_cast<u16>(g_HalfMaxCoord * svec.X / svec.W)));
 				y0 = std::min(y0,  static_cast<u16>(g_HalfMaxCoord + static_cast<u16>(g_HalfMaxCoord * svec.Y / svec.W)));
 				x1 = std::max(x1,  static_cast<u16>(g_HalfMaxCoord + static_cast<u16>(g_HalfMaxCoord * svec.X / svec.W)));
 				y1 = std::max(y1,  static_cast<u16>(g_HalfMaxCoord + static_cast<u16>(g_HalfMaxCoord * svec.Y / svec.W)));
 				z0 = std::min(z0, svec.Z / svec.W);
 			}
 		}
 	}
 	// TODO: there must be a quicker way to do this than to test every vertex,
 	// given the symmetry of the bounding box
 
 	occluder.x0 = Clamp(x0, std::numeric_limits<u16>::min(), static_cast<u16>(g_MaxCoord - 1));
 	occluder.y0 = Clamp(y0, std::numeric_limits<u16>::min(), static_cast<u16>(g_MaxCoord - 1));
 	occluder.x1 = Clamp(x1, std::numeric_limits<u16>::min(), static_cast<u16>(g_MaxCoord - 1));
 	occluder.y1 = Clamp(y1, std::numeric_limits<u16>::min(), static_cast<u16>(g_MaxCoord - 1));
 	occluder.z = z0;
 }
 
 static void ComputeScreenPos(Caster& caster, const CVector3D& pos, CMatrix3D& proj)
 {
 	CVector4D svec = proj.Transform(CVector4D(pos.X, pos.Y, pos.Z, 1.0f));
 	u16 x = g_HalfMaxCoord + static_cast<int>(g_HalfMaxCoord * svec.X / svec.W);
 	u16 y = g_HalfMaxCoord + static_cast<int>(g_HalfMaxCoord * svec.Y / svec.W);
 	caster.x = Clamp(x, std::numeric_limits<u16>::min(), static_cast<u16>(g_MaxCoord - 1));
 	caster.y = Clamp(y, std::numeric_limits<u16>::min(), static_cast<u16>(g_MaxCoord - 1));
 	caster.z = svec.Z / svec.W;
 }
 
 void SilhouetteRenderer::ComputeSubmissions(const CCamera& camera)
 {
 	PROFILE3("compute silhouettes");
 
 	m_DebugBounds.clear();
 	m_DebugRects.clear();
 	m_DebugSpheres.clear();
 
 	m_VisiblePatchOccluders.clear();
 	m_VisibleModelOccluders.clear();
 	m_VisibleModelCasters.clear();
 
 	std::vector<Occluder> occluders;
 	std::vector<Caster> casters;
 	std::vector<Entry> entries;
 
 	occluders.reserve(m_SubmittedModelOccluders.size() + m_SubmittedPatchOccluders.size());
 	casters.reserve(m_SubmittedModelCasters.size());
 	entries.reserve((m_SubmittedModelOccluders.size() + m_SubmittedPatchOccluders.size()) * 2 + m_SubmittedModelCasters.size());
 
 	CMatrix3D proj = camera.GetViewProjection();
 
 	// Bump the positions of unit casters upwards a bit, so they're not always
 	// detected as intersecting the terrain they're standing on
 	CVector3D posOffset(0.0f, 0.1f, 0.0f);
 
 #if 0
 	// For debugging ray-patch intersections - casts a ton of rays and draws
 	// a sphere where they intersect
 	for (int y = 0; y < g_yres; y += 8)
 	{
 		for (int x = 0; x < g_xres; x += 8)
 		{
 			SOverlaySphere sphere;
 			sphere.m_Color = CColor(1, 0, 0, 1);
 			sphere.m_Radius = 0.25f;
 			sphere.m_Center = camera.GetWorldCoordinates(x, y, false);
 
 			CVector3D origin, dir;
 			camera.BuildCameraRay(x, y, origin, dir);
 
 			for (size_t i = 0; i < m_SubmittedPatchOccluders.size(); ++i)
 			{
 				CPatch* occluder = m_SubmittedPatchOccluders[i];
 				if (CHFTracer::PatchRayIntersect(occluder, origin, dir, &sphere.m_Center))
 					sphere.m_Color = CColor(0, 0, 1, 1);
 			}
 			m_DebugSpheres.push_back(sphere);
 		}
 	}
 #endif
 
 	{
 		PROFILE("compute bounds");
 
 		for (size_t i = 0; i < m_SubmittedModelOccluders.size(); ++i)
 		{
 			CModel* occluder = m_SubmittedModelOccluders[i];
 
 			Occluder d;
 			d.renderable = occluder;
 			d.isPatch = false;
 			d.rendered = false;
 			ComputeScreenBounds(d, occluder->GetWorldBounds(), proj);
 
 			// Skip zero-sized occluders, so we don't need to worry about EDGE_OUT
 			// getting sorted before EDGE_IN
 			if (d.x0 == d.x1 || d.y0 == d.y1)
 				continue;
 
 			u16 id = static_cast<u16>(occluders.size());
 			occluders.push_back(d);
 
 			entries.push_back(EntryCreate(EDGE_IN, id, d.x0));
 			entries.push_back(EntryCreate(EDGE_OUT, id, d.x1));
 		}
 
 		for (size_t i = 0; i < m_SubmittedPatchOccluders.size(); ++i)
 		{
 			CPatch* occluder = m_SubmittedPatchOccluders[i];
 
 			Occluder d;
 			d.renderable = occluder;
 			d.isPatch = true;
 			d.rendered = false;
 			ComputeScreenBounds(d, occluder->GetWorldBounds(), proj);
 
 			// Skip zero-sized occluders
 			if (d.x0 == d.x1 || d.y0 == d.y1)
 				continue;
 
 			u16 id = static_cast<u16>(occluders.size());
 			occluders.push_back(d);
 
 			entries.push_back(EntryCreate(EDGE_IN, id, d.x0));
 			entries.push_back(EntryCreate(EDGE_OUT, id, d.x1));
 		}
 
 		for (size_t i = 0; i < m_SubmittedModelCasters.size(); ++i)
 		{
 			CModel* model = m_SubmittedModelCasters[i];
 			CVector3D pos = model->GetTransform().GetTranslation() + posOffset;
 
 			Caster d;
 			d.model = model;
 			d.rendered = false;
 			ComputeScreenPos(d, pos, proj);
 
 			u16 id = static_cast<u16>(casters.size());
 			casters.push_back(d);
 
 			entries.push_back(EntryCreate(POINT, id, d.x));
 		}
 	}
 
 	// Make sure the u16 id didn't overflow
 	ENSURE(occluders.size() < 65536 && casters.size() < 65536);
 
 	{
 		PROFILE("sorting");
 		std::sort(entries.begin(), entries.end());
 	}
 
 	{
 		PROFILE("sweeping");
 
 		ActiveList active;
 		CVector3D cameraPos = camera.GetOrientation().GetTranslation();
 
 		for (size_t i = 0; i < entries.size(); ++i)
 		{
 			Entry e = entries[i];
 			int type = EntryGetType(e);
 			u16 id = EntryGetId(e);
 			if (type == EDGE_IN)
 				active.Add(id);
 			else if (type == EDGE_OUT)
 				active.Remove(id);
 			else
 			{
 				Caster& caster = casters[id];
 				for (size_t j = 0; j < active.m_Ids.size(); ++j)
 				{
 					Occluder& occluder = occluders[active.m_Ids[j]];
 
 					if (caster.y < occluder.y0 || caster.y > occluder.y1)
 						continue;
 
 					if (caster.z < occluder.z)
 						continue;
 
 					// No point checking further if both are already being rendered
 					if (caster.rendered && occluder.rendered)
 						continue;
 
 					if (!g_DisablePreciseIntersections)
 					{
 						CVector3D pos = caster.model->GetTransform().GetTranslation() + posOffset;
 						if (occluder.isPatch)
 						{
 							CPatch* patch = static_cast<CPatch*>(occluder.renderable);
 							if (!CHFTracer::PatchRayIntersect(patch, pos, cameraPos - pos, NULL))
 								continue;
 						}
 						else
 						{
 							float tmin, tmax;
 							if (!occluder.renderable->GetWorldBounds().RayIntersect(pos, cameraPos - pos, tmin, tmax))
 								continue;
 						}
 					}
 
 					caster.rendered = true;
 					occluder.rendered = true;
 				}
 			}
 		}
 	}
 
 	if (m_DebugEnabled)
 	{
 		for (size_t i = 0; i < occluders.size(); ++i)
 		{
 			DebugRect r;
 			r.color = occluders[i].rendered ? CColor(1.0f, 1.0f, 0.0f, 1.0f) : CColor(0.2f, 0.2f, 0.0f, 1.0f);
 			r.x0 = occluders[i].x0;
 			r.y0 = occluders[i].y0;
 			r.x1 = occluders[i].x1;
 			r.y1 = occluders[i].y1;
 			m_DebugRects.push_back(r);
 
 			DebugBounds b;
 			b.color = r.color;
 			b.bounds = occluders[i].renderable->GetWorldBounds();
 			m_DebugBounds.push_back(b);
 		}
 	}
 
 	for (size_t i = 0; i < occluders.size(); ++i)
 	{
 		if (occluders[i].rendered)
 		{
 			if (occluders[i].isPatch)
 				m_VisiblePatchOccluders.push_back(static_cast<CPatch*>(occluders[i].renderable));
 			else
 				m_VisibleModelOccluders.push_back(static_cast<CModel*>(occluders[i].renderable));
 		}
 	}
 
 	for (size_t i = 0; i < casters.size(); ++i)
 		if (casters[i].rendered)
 			m_VisibleModelCasters.push_back(casters[i].model);
 }
 
 void SilhouetteRenderer::RenderSubmitOverlays(SceneCollector& collector)
 {
 	for (size_t i = 0; i < m_DebugSpheres.size(); i++)
 		collector.Submit(&m_DebugSpheres[i]);
 }
 
 void SilhouetteRenderer::RenderSubmitOccluders(SceneCollector& collector)
 {
 	for (size_t i = 0; i < m_VisiblePatchOccluders.size(); ++i)
 		collector.Submit(m_VisiblePatchOccluders[i]);
 
 	for (size_t i = 0; i < m_VisibleModelOccluders.size(); ++i)
 		collector.SubmitNonRecursive(m_VisibleModelOccluders[i]);
 }
 
 void SilhouetteRenderer::RenderSubmitCasters(SceneCollector& collector)
 {
 	for (size_t i = 0; i < m_VisibleModelCasters.size(); ++i)
 		collector.SubmitNonRecursive(m_VisibleModelCasters[i]);
 }
 
 void SilhouetteRenderer::RenderDebugBounds(
 	Renderer::Backend::GL::CDeviceCommandContext* UNUSED(deviceCommandContext))
 {
 	if (m_DebugBounds.empty())
 		return;
 
 	for (size_t i = 0; i < m_DebugBounds.size(); ++i)
 		g_Renderer.GetDebugRenderer().DrawBoundingBox(m_DebugBounds[i].bounds, m_DebugBounds[i].color, true);
 }
 
 void SilhouetteRenderer::RenderDebugOverlays(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	if (m_DebugRects.empty())
 		return;
 
 	// TODO: use CCanvas2D for drawing rects.
 	CMatrix3D m;
 	m.SetIdentity();
 	m.Scale(1.0f, -1.f, 1.0f);
 	m.Translate(0.0f, (float)g_yres, -1000.0f);
 
 	CMatrix3D proj;
 	proj.SetOrtho(0.f, g_MaxCoord, 0.f, g_MaxCoord, -1.f, 1000.f);
 	m = proj * m;
 
 	CShaderTechniquePtr shaderTech = g_Renderer.GetShaderManager().LoadEffect(str_solid);
 	shaderTech->BeginPass();
 	Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 		shaderTech->GetGraphicsPipelineStateDesc();
 	pipelineStateDesc.rasterizationState.polygonMode = Renderer::Backend::PolygonMode::LINE;
 	pipelineStateDesc.rasterizationState.cullMode = Renderer::Backend::CullMode::NONE;
 	deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 
 	const CShaderProgramPtr& shader = shaderTech->GetShader();
 	shader->Uniform(str_transform, proj);
 
 	for (size_t i = 0; i < m_DebugRects.size(); ++i)
 	{
 		const DebugRect& r = m_DebugRects[i];
 		shader->Uniform(str_color, r.color);
 		u16 verts[] =
 		{
 			r.x0, r.y0,
 			r.x1, r.y0,
 			r.x1, r.y1,
 			r.x0, r.y0,
 			r.x1, r.y1,
 			r.x0, r.y1,
 		};
 		shader->VertexPointer(2, GL_SHORT, 0, verts);
-		glDrawArrays(GL_TRIANGLES, 0, 6);
+		deviceCommandContext->Draw(0, 6);
 	}
 
 	shaderTech->EndPass();
 }
 
 void SilhouetteRenderer::EndFrame()
 {
 	m_SubmittedPatchOccluders.clear();
 	m_SubmittedModelOccluders.clear();
 	m_SubmittedModelCasters.clear();
 }
Index: ps/trunk/source/renderer/SkyManager.cpp
===================================================================
--- ps/trunk/source/renderer/SkyManager.cpp	(revision 26524)
+++ ps/trunk/source/renderer/SkyManager.cpp	(revision 26525)
@@ -1,336 +1,336 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "renderer/SkyManager.h"
 
 #include "graphics/LightEnv.h"
 #include "graphics/ShaderManager.h"
 #include "graphics/Terrain.h"
 #include "graphics/TextureManager.h"
 #include "lib/bits.h"
 #include "lib/tex/tex.h"
 #include "lib/timer.h"
 #include "maths/MathUtil.h"
 #include "ps/CLogger.h"
 #include "ps/ConfigDB.h"
 #include "ps/CStr.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Filesystem.h"
 #include "ps/Game.h"
 #include "ps/Loader.h"
 #include "ps/VideoMode.h"
 #include "ps/World.h"
 #include "renderer/backend/gl/Device.h"
 #include "renderer/Renderer.h"
 #include "renderer/SceneRenderer.h"
 #include "renderer/RenderingOptions.h"
 
 #include <algorithm>
 
 SkyManager::SkyManager()
 	: m_VertexArray(Renderer::Backend::GL::CBuffer::Type::VERTEX, false)
 {
 	CFG_GET_VAL("showsky", m_RenderSky);
 }
 
 void SkyManager::LoadAndUploadSkyTexturesIfNeeded(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	if (m_SkyCubeMap)
 		return;
 
 	GPU_SCOPED_LABEL(deviceCommandContext, "Load Sky Textures");
 	static const CStrW images[NUMBER_OF_TEXTURES + 1] = {
 		L"front",
 		L"back",
 		L"top",
 		L"top",
 		L"right",
 		L"left"
 	};
 
 	/*for (size_t i = 0; i < ARRAY_SIZE(m_SkyTexture); ++i)
 	{
 		VfsPath path = VfsPath("art/textures/skies") / m_SkySet / (Path::String(s_imageNames[i])+L".dds");
 
 		CTextureProperties textureProps(path);
 		textureProps.SetWrap(GL_CLAMP_TO_EDGE);
 		CTexturePtr texture = g_Renderer.GetTextureManager().CreateTexture(textureProps);
 		texture->Prefetch();
 		m_SkyTexture[i] = texture;
 	}*/
 
 	///////////////////////////////////////////////////////////////////////////
 	// HACK: THE HORRIBLENESS HERE IS OVER 9000. The following code is a HUGE hack and will be removed completely
 	// as soon as all the hardcoded GL_TEXTURE_2D references are corrected in the TextureManager/OGL/tex libs.
 
 	Tex textures[NUMBER_OF_TEXTURES + 1];
 
 	for (size_t i = 0; i < NUMBER_OF_TEXTURES + 1; ++i)
 	{
 		VfsPath path = VfsPath("art/textures/skies") / m_SkySet / (Path::String(images[i]) + L".dds");
 
 		std::shared_ptr<u8> file;
 		size_t fileSize;
 		if (g_VFS->LoadFile(path, file, fileSize) != INFO::OK)
 		{
 			path = VfsPath("art/textures/skies") / m_SkySet / (Path::String(images[i]) + L".dds.cached.dds");
 			if (g_VFS->LoadFile(path, file, fileSize) != INFO::OK)
 			{
 				LOGERROR("Error creating sky cubemap '%s', can't load file: '%s'.", m_SkySet.ToUTF8().c_str(), path.string8().c_str());
 				return;
 			}
 		}
 
 		textures[i].decode(file, fileSize);
 		textures[i].transform_to((textures[i].m_Flags | TEX_BOTTOM_UP | TEX_ALPHA) & ~(TEX_DXT | TEX_MIPMAPS));
 
 		if (!is_pow2(textures[i].m_Width) || !is_pow2(textures[i].m_Height))
 		{
 			LOGERROR("Error creating sky cubemap '%s', cube textures should have power of 2 sizes.", m_SkySet.ToUTF8().c_str());
 			return;
 		}
 
 		if (textures[i].m_Width != textures[0].m_Width || textures[i].m_Height != textures[0].m_Height)
 		{
 			LOGERROR("Error creating sky cubemap '%s', cube textures have different sizes.", m_SkySet.ToUTF8().c_str());
 			return;
 		}
 	}
 
 	m_SkyCubeMap = g_VideoMode.GetBackendDevice()->CreateTexture("SkyCubeMap",
 		Renderer::Backend::GL::CTexture::Type::TEXTURE_CUBE,
 		Renderer::Backend::Format::R8G8B8A8, textures[0].m_Width, textures[0].m_Height,
 		Renderer::Backend::Sampler::MakeDefaultSampler(
 			Renderer::Backend::Sampler::Filter::LINEAR,
 			Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE), 1, 1);
 
 	std::vector<u8> rotated;
 	for (size_t i = 0; i < NUMBER_OF_TEXTURES + 1; ++i)
 	{
 		u8* data = textures[i].get_data();
 
 		// We need to rotate the side if it's looking up or down.
 		// TODO: maybe it should be done during texture conversion.
 		if (i == 2 || i == 3)
 		{
 			rotated.resize(textures[i].m_DataSize);
 
 			for (size_t y = 0; y < textures[i].m_Height; ++y)
 			{
 				for (size_t x = 0; x < textures[i].m_Width; ++x)
 				{
 					const size_t invX = y;
 					const size_t invY = textures[i].m_Width - x - 1;
 
 					rotated[(y * textures[i].m_Width + x) * 4 + 0] = data[(invY * textures[i].m_Width + invX) * 4 + 0];
 					rotated[(y * textures[i].m_Width + x) * 4 + 1] = data[(invY * textures[i].m_Width + invX) * 4 + 1];
 					rotated[(y * textures[i].m_Width + x) * 4 + 2] = data[(invY * textures[i].m_Width + invX) * 4 + 2];
 					rotated[(y * textures[i].m_Width + x) * 4 + 3] = data[(invY * textures[i].m_Width + invX) * 4 + 3];
 				}
 			}
 
 			deviceCommandContext->UploadTexture(
 				m_SkyCubeMap.get(), Renderer::Backend::Format::R8G8B8A8,
 				&rotated[0], textures[i].m_DataSize, 0, i);
 		}
 		else
 		{
 			deviceCommandContext->UploadTexture(
 				m_SkyCubeMap.get(), Renderer::Backend::Format::R8G8B8A8,
 				data, textures[i].m_DataSize, 0, i);
 		}
 	}
 	///////////////////////////////////////////////////////////////////////////
 }
 
 void SkyManager::SetSkySet(const CStrW& newSet)
 {
 	if (newSet == m_SkySet)
 		return;
 
 	m_SkyCubeMap.reset();
 
 	m_SkySet = newSet;
 }
 
 std::vector<CStrW> SkyManager::GetSkySets() const
 {
 	std::vector<CStrW> skies;
 
 	// Find all subdirectories in art/textures/skies
 
 	const VfsPath path(L"art/textures/skies/");
 	DirectoryNames subdirectories;
 	if (g_VFS->GetDirectoryEntries(path, 0, &subdirectories) != INFO::OK)
 	{
 		LOGERROR("Error opening directory '%s'", path.string8());
 		return std::vector<CStrW>(1, GetSkySet()); // just return what we currently have
 	}
 
 	for(size_t i = 0; i < subdirectories.size(); i++)
 		skies.push_back(subdirectories[i].string());
 	sort(skies.begin(), skies.end());
 
 	return skies;
 }
 
 void SkyManager::RenderSky(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render sky");
 #if CONFIG2_GLES
 	UNUSED2(deviceCommandContext);
 #warning TODO: implement SkyManager::RenderSky for GLES
 #else
 	if (!m_RenderSky)
 		return;
 
 	// Do nothing unless SetSkySet was called
 	if (m_SkySet.empty() || !m_SkyCubeMap)
 		return;
 
 	if (m_VertexArray.GetNumberOfVertices() == 0)
 		CreateSkyCube();
 
 	const CCamera& camera = g_Renderer.GetSceneRenderer().GetViewCamera();
 
 	CShaderTechniquePtr skytech =
 		g_Renderer.GetShaderManager().LoadEffect(str_sky_simple);
 	skytech->BeginPass();
 	deviceCommandContext->SetGraphicsPipelineState(
 		skytech->GetGraphicsPipelineStateDesc());
 	const CShaderProgramPtr& shader = skytech->GetShader();
 	shader->BindTexture(str_baseTex, m_SkyCubeMap.get());
 
 	// Translate so the sky center is at the camera space origin.
 	CMatrix3D translate;
 	translate.SetTranslation(camera.GetOrientation().GetTranslation());
 
 	// Currently we have a hardcoded near plane in the projection matrix.
 	CMatrix3D scale;
 	scale.SetScaling(10.0f, 10.0f, 10.0f);
 
 	// Rotate so that the "left" face, which contains the brightest part of
 	// each skymap, is in the direction of the sun from our light
 	// environment.
 	CMatrix3D rotate;
 	rotate.SetYRotation(M_PI + g_Renderer.GetSceneRenderer().GetLightEnv().GetRotation());
 
 	shader->Uniform(
 		str_transform,
 		camera.GetViewProjection() * translate * rotate * scale);
 
 	m_VertexArray.PrepareForRendering();
 
 	u8* base = m_VertexArray.Bind(deviceCommandContext);
 	const GLsizei stride = static_cast<GLsizei>(m_VertexArray.GetStride());
 
 	shader->VertexPointer(
 		3, GL_FLOAT, stride, base + m_AttributePosition.offset);
 	shader->TexCoordPointer(
 		GL_TEXTURE0, 3, GL_FLOAT, stride, base + m_AttributeUV.offset);
 	shader->AssertPointersBound();
 
-	glDrawArrays(GL_TRIANGLES, 0, m_VertexArray.GetNumberOfVertices());
+	deviceCommandContext->Draw(0, m_VertexArray.GetNumberOfVertices());
 
 	skytech->EndPass();
 #endif
 }
 
 void SkyManager::CreateSkyCube()
 {
 	m_AttributePosition.type = GL_FLOAT;
 	m_AttributePosition.elems = 3;
 	m_VertexArray.AddAttribute(&m_AttributePosition);
 
 	m_AttributeUV.type = GL_FLOAT;
 	m_AttributeUV.elems = 3;
 	m_VertexArray.AddAttribute(&m_AttributeUV);
 
 	// 6 sides of cube with 6 vertices.
 	m_VertexArray.SetNumberOfVertices(6 * 6);
 	m_VertexArray.Layout();
 
 	VertexArrayIterator<CVector3D> attrPosition = m_AttributePosition.GetIterator<CVector3D>();
 	VertexArrayIterator<CVector3D> attrUV = m_AttributeUV.GetIterator<CVector3D>();
 
 #define ADD_VERTEX(U, V, W, VX, VY, VZ) \
 	STMT( \
 		attrPosition->X = VX; \
 		attrPosition->Y = VY; \
 		attrPosition->Z = VZ; \
 		++attrPosition; \
 		attrUV->X = U; \
 		attrUV->Y = V; \
 		attrUV->Z = W; \
 		++attrUV;)
 
 	// Axis -X
 	ADD_VERTEX(+1, +1, +1, -1.0f, -1.0f, -1.0f);
 	ADD_VERTEX(+1, +1, -1, -1.0f, -1.0f, +1.0f);
 	ADD_VERTEX(+1, -1, -1, -1.0f, +1.0f, +1.0f);
 	ADD_VERTEX(+1, +1, +1, -1.0f, -1.0f, -1.0f);
 	ADD_VERTEX(+1, -1, -1, -1.0f, +1.0f, +1.0f);
 	ADD_VERTEX(+1, -1, +1, -1.0f, +1.0f, -1.0f);
 
 	// Axis +X
 	ADD_VERTEX(-1, +1, -1, +1.0f, -1.0f, +1.0f);
 	ADD_VERTEX(-1, +1, +1, +1.0f, -1.0f, -1.0f);
 	ADD_VERTEX(-1, -1, +1, +1.0f, +1.0f, -1.0f);
 	ADD_VERTEX(-1, +1, -1, +1.0f, -1.0f, +1.0f);
 	ADD_VERTEX(-1, -1, +1, +1.0f, +1.0f, -1.0f);
 	ADD_VERTEX(-1, -1, -1, +1.0f, +1.0f, +1.0f);
 
 	// Axis -Y
 	ADD_VERTEX(-1, +1, +1, +1.0f, -1.0f, -1.0f);
 	ADD_VERTEX(-1, +1, -1, +1.0f, -1.0f, +1.0f);
 	ADD_VERTEX(+1, +1, -1, -1.0f, -1.0f, +1.0f);
 	ADD_VERTEX(-1, +1, +1, +1.0f, -1.0f, -1.0f);
 	ADD_VERTEX(+1, +1, -1, -1.0f, -1.0f, +1.0f);
 	ADD_VERTEX(+1, +1, +1, -1.0f, -1.0f, -1.0f);
 
 	// Axis +Y
 	ADD_VERTEX(+1, -1, +1, -1.0f, +1.0f, -1.0f);
 	ADD_VERTEX(+1, -1, -1, -1.0f, +1.0f, +1.0f);
 	ADD_VERTEX(-1, -1, -1, +1.0f, +1.0f, +1.0f);
 	ADD_VERTEX(+1, -1, +1, -1.0f, +1.0f, -1.0f);
 	ADD_VERTEX(-1, -1, -1, +1.0f, +1.0f, +1.0f);
 	ADD_VERTEX(-1, -1, +1, +1.0f, +1.0f, -1.0f);
 
 	// Axis -Z
 	ADD_VERTEX(-1, +1, +1, +1.0f, -1.0f, -1.0f);
 	ADD_VERTEX(+1, +1, +1, -1.0f, -1.0f, -1.0f);
 	ADD_VERTEX(+1, -1, +1, -1.0f, +1.0f, -1.0f);
 	ADD_VERTEX(-1, +1, +1, +1.0f, -1.0f, -1.0f);
 	ADD_VERTEX(+1, -1, +1, -1.0f, +1.0f, -1.0f);
 	ADD_VERTEX(-1, -1, +1, +1.0f, +1.0f, -1.0f);
 
 	// Axis +Z
 	ADD_VERTEX(+1, +1, -1, -1.0f, -1.0f, +1.0f);
 	ADD_VERTEX(-1, +1, -1, +1.0f, -1.0f, +1.0f);
 	ADD_VERTEX(-1, -1, -1, +1.0f, +1.0f, +1.0f);
 	ADD_VERTEX(+1, +1, -1, -1.0f, -1.0f, +1.0f);
 	ADD_VERTEX(-1, -1, -1, +1.0f, +1.0f, +1.0f);
 	ADD_VERTEX(+1, -1, -1, -1.0f, +1.0f, +1.0f);
 #undef ADD_VERTEX
 
 	m_VertexArray.Upload();
 	m_VertexArray.FreeBackingStore();
 }
Index: ps/trunk/source/renderer/TerrainOverlay.cpp
===================================================================
--- ps/trunk/source/renderer/TerrainOverlay.cpp	(revision 26524)
+++ ps/trunk/source/renderer/TerrainOverlay.cpp	(revision 26525)
@@ -1,394 +1,394 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "TerrainOverlay.h"
 
 #include "graphics/Color.h"
 #include "graphics/ShaderManager.h"
 #include "graphics/ShaderProgram.h"
 #include "graphics/Terrain.h"
 #include "lib/bits.h"
 #include "lib/ogl.h"
 #include "maths/MathUtil.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Game.h"
 #include "ps/Profile.h"
 #include "ps/World.h"
 #include "renderer/backend/gl/Device.h"
 #include "renderer/Renderer.h"
 #include "renderer/SceneRenderer.h"
 #include "renderer/TerrainRenderer.h"
 #include "simulation2/system/SimContext.h"
 
 #include <algorithm>
 
 // Global overlay list management:
 
 static std::vector<std::pair<ITerrainOverlay*, int> > g_TerrainOverlayList;
 
 ITerrainOverlay::ITerrainOverlay(int priority)
 {
 	// Add to global list of overlays
 	g_TerrainOverlayList.emplace_back(this, priority);
 	// Sort by overlays by priority. Do stable sort so that adding/removing
 	// overlays doesn't randomly disturb all the existing ones (which would
 	// be noticeable if they have the same priority and overlap).
 	std::stable_sort(g_TerrainOverlayList.begin(), g_TerrainOverlayList.end(),
 		[](const std::pair<ITerrainOverlay*, int>& a, const std::pair<ITerrainOverlay*, int>& b) {
 			return a.second < b.second;
 		});
 }
 
 ITerrainOverlay::~ITerrainOverlay()
 {
 	std::vector<std::pair<ITerrainOverlay*, int> >::iterator newEnd =
 		std::remove_if(g_TerrainOverlayList.begin(), g_TerrainOverlayList.end(),
 			[this](const std::pair<ITerrainOverlay*, int>& a) { return a.first == this; });
 	g_TerrainOverlayList.erase(newEnd, g_TerrainOverlayList.end());
 }
 
 
 void ITerrainOverlay::RenderOverlaysBeforeWater(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	if (g_TerrainOverlayList.empty())
 		return;
 
 	PROFILE3_GPU("terrain overlays (before)");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render terrain overlays before water");
 
 	for (size_t i = 0; i < g_TerrainOverlayList.size(); ++i)
 		g_TerrainOverlayList[i].first->RenderBeforeWater(deviceCommandContext);
 }
 
 void ITerrainOverlay::RenderOverlaysAfterWater(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext, int cullGroup)
 {
 	if (g_TerrainOverlayList.empty())
 		return;
 
 	PROFILE3_GPU("terrain overlays (after)");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render terrain overlays after water");
 
 	for (size_t i = 0; i < g_TerrainOverlayList.size(); ++i)
 		g_TerrainOverlayList[i].first->RenderAfterWater(deviceCommandContext, cullGroup);
 }
 
 //////////////////////////////////////////////////////////////////////////
 
 TerrainOverlay::TerrainOverlay(const CSimContext& simContext, int priority /* = 100 */)
 	: ITerrainOverlay(priority), m_Terrain(&simContext.GetTerrain())
 {
 }
 
 void TerrainOverlay::StartRender()
 {
 }
 
 void TerrainOverlay::EndRender()
 {
 }
 
 void TerrainOverlay::GetTileExtents(
 	ssize_t& min_i_inclusive, ssize_t& min_j_inclusive,
 	ssize_t& max_i_inclusive, ssize_t& max_j_inclusive)
 {
 	// Default to whole map
 	min_i_inclusive = min_j_inclusive = 0;
 	max_i_inclusive = max_j_inclusive = m_Terrain->GetTilesPerSide()-1;
 }
 
 void TerrainOverlay::RenderBeforeWater(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	if (!m_Terrain)
 		return; // should never happen, but let's play it safe
 
 #if CONFIG2_GLES
 	UNUSED2(deviceCommandContext);
 #warning TODO: implement TerrainOverlay::RenderOverlays for GLES
 #else
 	StartRender();
 
 	ssize_t min_i, min_j, max_i, max_j;
 	GetTileExtents(min_i, min_j, max_i, max_j);
 	// Clamp the min to 0, but the max to -1 - so tile -1 can never be rendered,
 	// but if unclamped_max<0 then no tiles at all will be rendered. And the same
 	// for the upper limit.
 	min_i = Clamp<ssize_t>(min_i, 0, m_Terrain->GetTilesPerSide());
 	min_j = Clamp<ssize_t>(min_j, 0, m_Terrain->GetTilesPerSide());
 	max_i = Clamp<ssize_t>(max_i, -1, m_Terrain->GetTilesPerSide()-1);
 	max_j = Clamp<ssize_t>(max_j, -1, m_Terrain->GetTilesPerSide()-1);
 
 	for (m_j = min_j; m_j <= max_j; ++m_j)
 		for (m_i = min_i; m_i <= max_i; ++m_i)
 			ProcessTile(deviceCommandContext, m_i, m_j);
 
 	EndRender();
 #endif
 }
 
 void TerrainOverlay::RenderTile(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CColor& color, bool drawHidden)
 {
 	RenderTile(deviceCommandContext, color, drawHidden, m_i, m_j);
 }
 
 void TerrainOverlay::RenderTile(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CColor& color, bool drawHidden, ssize_t i, ssize_t j)
 {
 	// TODO: unnecessary computation calls has been removed but we should use
 	// a vertex buffer or a vertex shader with a texture.
 	// Not sure if it's possible on old OpenGL.
 
 #if CONFIG2_GLES
 	UNUSED2(deviceCommandContext);
 	UNUSED2(color);
 	UNUSED2(drawHidden);
 	UNUSED2(i);
 	UNUSED2(j);
 	#warning TODO: implement TerrainOverlay::RenderTile for GLES
 #else
 
 	CVector3D pos[2][2];
 	for (int di = 0; di < 2; ++di)
 		for (int dj = 0; dj < 2; ++dj)
 			m_Terrain->CalcPosition(i + di, j + dj, pos[di][dj]);
 
 	std::vector<float> vertices;
 #define ADD(position) \
 	vertices.emplace_back((position).X); \
 	vertices.emplace_back((position).Y); \
 	vertices.emplace_back((position).Z);
 
 	if (m_Terrain->GetTriangulationDir(i, j))
 	{
 		ADD(pos[0][0]);
 		ADD(pos[1][0]);
 		ADD(pos[0][1]);
 
 		ADD(pos[1][0]);
 		ADD(pos[1][1]);
 		ADD(pos[0][1]);
 	}
 	else
 	{
 		ADD(pos[0][0]);
 		ADD(pos[1][0]);
 		ADD(pos[1][1]);
 
 		ADD(pos[1][1]);
 		ADD(pos[0][1]);
 		ADD(pos[0][0]);
 	}
 #undef ADD
 
 	CShaderTechniquePtr overlayTech =
 		g_Renderer.GetShaderManager().LoadEffect(str_debug_line);
 	Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 		overlayTech->GetGraphicsPipelineStateDesc();
 	pipelineStateDesc.depthStencilState.depthTestEnabled = !drawHidden;
 	pipelineStateDesc.blendState.enabled = true;
 	pipelineStateDesc.blendState.srcColorBlendFactor = pipelineStateDesc.blendState.srcAlphaBlendFactor =
 		Renderer::Backend::BlendFactor::SRC_ALPHA;
 	pipelineStateDesc.blendState.dstColorBlendFactor = pipelineStateDesc.blendState.dstAlphaBlendFactor =
 		Renderer::Backend::BlendFactor::ONE_MINUS_SRC_ALPHA;
 	pipelineStateDesc.blendState.colorBlendOp = pipelineStateDesc.blendState.alphaBlendOp =
 		Renderer::Backend::BlendOp::ADD;
 	pipelineStateDesc.rasterizationState.cullMode =
 		drawHidden ? Renderer::Backend::CullMode::NONE : Renderer::Backend::CullMode::BACK;
 	// To ensure that outlines are drawn on top of the terrain correctly (and
 	// don't Z-fight and flicker nastily), use detph bias to pull them towards
 	// the camera.
 	pipelineStateDesc.rasterizationState.depthBiasEnabled = true;
 	pipelineStateDesc.rasterizationState.depthBiasConstantFactor = -1.0f;
 	pipelineStateDesc.rasterizationState.depthBiasSlopeFactor = -1.0f;
 	overlayTech->BeginPass();
 	deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 
 	CShaderProgramPtr overlayShader = overlayTech->GetShader();
 
 	overlayShader->Uniform(str_transform, g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection());
 	overlayShader->Uniform(str_color, color);
 
 	overlayShader->VertexPointer(3, GL_FLOAT, 0, vertices.data());
 	overlayShader->AssertPointersBound();
 
-	glDrawArrays(GL_TRIANGLES, 0, vertices.size() / 3);
+	deviceCommandContext->Draw(0, vertices.size() / 3);
 
 	overlayTech->EndPass();
 #endif
 }
 
 void TerrainOverlay::RenderTileOutline(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CColor& color, bool drawHidden)
 {
 	RenderTileOutline(deviceCommandContext, color, drawHidden, m_i, m_j);
 }
 
 void TerrainOverlay::RenderTileOutline(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CColor& color, bool drawHidden, ssize_t i, ssize_t j)
 {
 #if CONFIG2_GLES
 	UNUSED2(deviceCommandContext);
 	UNUSED2(color);
 	UNUSED2(drawHidden);
 	UNUSED2(i);
 	UNUSED2(j);
 	#warning TODO: implement TerrainOverlay::RenderTileOutline for GLES
 #else
 
 	std::vector<float> vertices;
 #define ADD(i, j) \
 	m_Terrain->CalcPosition(i, j, position); \
 	vertices.emplace_back(position.X); \
 	vertices.emplace_back(position.Y); \
 	vertices.emplace_back(position.Z);
 
 	CVector3D position;
 	ADD(i, j);
 	ADD(i + 1, j);
 	ADD(i + 1, j + 1);
 	ADD(i, j);
 	ADD(i + 1, j + 1);
 	ADD(i, j + 1);
 #undef ADD
 
 	CShaderTechniquePtr overlayTech =
 		g_Renderer.GetShaderManager().LoadEffect(str_debug_line);
 	Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 		overlayTech->GetGraphicsPipelineStateDesc();
 	pipelineStateDesc.depthStencilState.depthTestEnabled = !drawHidden;
 	pipelineStateDesc.blendState.enabled = true;
 	pipelineStateDesc.blendState.srcColorBlendFactor = pipelineStateDesc.blendState.srcAlphaBlendFactor =
 		Renderer::Backend::BlendFactor::SRC_ALPHA;
 	pipelineStateDesc.blendState.dstColorBlendFactor = pipelineStateDesc.blendState.dstAlphaBlendFactor =
 		Renderer::Backend::BlendFactor::ONE_MINUS_SRC_ALPHA;
 	pipelineStateDesc.blendState.colorBlendOp = pipelineStateDesc.blendState.alphaBlendOp =
 		Renderer::Backend::BlendOp::ADD;
 	pipelineStateDesc.rasterizationState.cullMode =
 		drawHidden ? Renderer::Backend::CullMode::NONE : Renderer::Backend::CullMode::BACK;
 	pipelineStateDesc.rasterizationState.polygonMode = Renderer::Backend::PolygonMode::LINE;
 	overlayTech->BeginPass();
 	deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 
 	const CShaderProgramPtr& overlayShader = overlayTech->GetShader();
 
 	overlayShader->Uniform(str_transform, g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection());
 	overlayShader->Uniform(str_color, color);
 
 	overlayShader->VertexPointer(3, GL_FLOAT, 0, vertices.data());
 	overlayShader->AssertPointersBound();
 
-	glDrawArrays(GL_TRIANGLES, 0, vertices.size() / 3);
+	deviceCommandContext->Draw(0, vertices.size() / 3);
 
 	overlayTech->EndPass();
 #endif
 }
 
 //////////////////////////////////////////////////////////////////////////
 
 TerrainTextureOverlay::TerrainTextureOverlay(float texelsPerTile, int priority) :
 	ITerrainOverlay(priority), m_TexelsPerTile(texelsPerTile)
 {
 }
 
 TerrainTextureOverlay::~TerrainTextureOverlay() = default;
 
 void TerrainTextureOverlay::RenderAfterWater(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext, int cullGroup)
 {
 	CTerrain* terrain = g_Game->GetWorld()->GetTerrain();
 
 	ssize_t w = (ssize_t)(terrain->GetTilesPerSide() * m_TexelsPerTile);
 	ssize_t h = (ssize_t)(terrain->GetTilesPerSide() * m_TexelsPerTile);
 
 	const uint32_t requiredWidth = round_up_to_pow2(w);
 	const uint32_t requiredHeight = round_up_to_pow2(h);
 
 	// Recreate the texture with new size if necessary
 	if (!m_Texture || m_Texture->GetWidth() != requiredWidth || m_Texture->GetHeight() != requiredHeight)
 	{
 		m_Texture = deviceCommandContext->GetDevice()->CreateTexture2D("TerrainOverlayTexture",
 			Renderer::Backend::Format::R8G8B8A8, requiredWidth, requiredHeight,
 			Renderer::Backend::Sampler::MakeDefaultSampler(
 				Renderer::Backend::Sampler::Filter::NEAREST,
 				Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE));
 	}
 
 	u8* data = (u8*)calloc(w * h, 4);
 	BuildTextureRGBA(data, w, h);
 
 	deviceCommandContext->UploadTextureRegion(
 		m_Texture.get(), Renderer::Backend::Format::R8G8B8A8, data, w * h * 4, 0, 0, w, h);
 
 	free(data);
 
 	CMatrix3D matrix;
 	matrix.SetZero();
 	matrix._11 = m_TexelsPerTile / (m_Texture->GetWidth() * TERRAIN_TILE_SIZE);
 	matrix._23 = m_TexelsPerTile / (m_Texture->GetHeight() * TERRAIN_TILE_SIZE);
 	matrix._44 = 1;
 
 	g_Renderer.GetSceneRenderer().GetTerrainRenderer().RenderTerrainOverlayTexture(
 		deviceCommandContext, cullGroup, matrix, m_Texture.get());
 }
 
 SColor4ub TerrainTextureOverlay::GetColor(size_t idx, u8 alpha) const
 {
 	static u8 colors[][3] =
 	{
 		{ 255, 0, 0 },
 		{ 0, 255, 0 },
 		{ 0, 0, 255 },
 		{ 255, 255, 0 },
 		{ 255, 0, 255 },
 		{ 0, 255, 255 },
 		{ 255, 255, 255 },
 
 		{ 127, 0, 0 },
 		{ 0, 127, 0 },
 		{ 0, 0, 127 },
 		{ 127, 127, 0 },
 		{ 127, 0, 127 },
 		{ 0, 127, 127 },
 		{ 127, 127, 127},
 
 		{ 255, 127, 0 },
 		{ 127, 255, 0 },
 		{ 255, 0, 127 },
 		{ 127, 0, 255},
 		{ 0, 255, 127 },
 		{ 0, 127, 255},
 		{ 255, 127, 127},
 		{ 127, 255, 127},
 		{ 127, 127, 255},
 
 		{ 127, 255, 255 },
 		{ 255, 127, 255 },
 		{ 255, 255, 127 },
 	};
 
 	size_t c = idx % ARRAY_SIZE(colors);
 	return SColor4ub(colors[c][0], colors[c][1], colors[c][2], alpha);
 }
Index: ps/trunk/source/renderer/TerrainRenderer.cpp
===================================================================
--- ps/trunk/source/renderer/TerrainRenderer.cpp	(revision 26524)
+++ ps/trunk/source/renderer/TerrainRenderer.cpp	(revision 26525)
@@ -1,627 +1,627 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "renderer/TerrainRenderer.h"
 
 #include "graphics/Camera.h"
 #include "graphics/Canvas2D.h"
 #include "graphics/Decal.h"
 #include "graphics/GameView.h"
 #include "graphics/LightEnv.h"
 #include "graphics/LOSTexture.h"
 #include "graphics/Patch.h"
 #include "graphics/Model.h"
 #include "graphics/ShaderManager.h"
 #include "graphics/TerritoryTexture.h"
 #include "graphics/TextRenderer.h"
 #include "graphics/TextureManager.h"
 #include "maths/MathUtil.h"
 #include "ps/CLogger.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Filesystem.h"
 #include "ps/Game.h"
 #include "ps/Profile.h"
 #include "ps/World.h"
 #include "renderer/backend/gl/Device.h"
 #include "renderer/DecalRData.h"
 #include "renderer/PatchRData.h"
 #include "renderer/Renderer.h"
 #include "renderer/RenderingOptions.h"
 #include "renderer/SceneRenderer.h"
 #include "renderer/ShadowMap.h"
 #include "renderer/SkyManager.h"
 #include "renderer/VertexArray.h"
 #include "renderer/WaterManager.h"
 
 /**
  * TerrainRenderer keeps track of which phase it is in, to detect
  * when Submit, PrepareForRendering etc. are called in the wrong order.
  */
 enum Phase
 {
 	Phase_Submit,
 	Phase_Render
 };
 
 
 /**
  * Struct TerrainRendererInternals: Internal variables used by the TerrainRenderer class.
  */
 struct TerrainRendererInternals
 {
 	/// Which phase (submitting or rendering patches) are we in right now?
 	Phase phase;
 
 	/// Patches that were submitted for this frame
 	std::vector<CPatchRData*> visiblePatches[CSceneRenderer::CULL_MAX];
 
 	/// Decals that were submitted for this frame
 	std::vector<CDecalRData*> visibleDecals[CSceneRenderer::CULL_MAX];
 
 	/// Fancy water shader
 	CShaderTechniquePtr fancyWaterTech;
 
 	CSimulation2* simulation;
 };
 
 
 
 ///////////////////////////////////////////////////////////////////
 // Construction/Destruction
 TerrainRenderer::TerrainRenderer()
 {
 	m = new TerrainRendererInternals();
 	m->phase = Phase_Submit;
 }
 
 TerrainRenderer::~TerrainRenderer()
 {
 	delete m;
 }
 
 void TerrainRenderer::SetSimulation(CSimulation2* simulation)
 {
 	m->simulation = simulation;
 }
 
 ///////////////////////////////////////////////////////////////////
 // Submit a patch for rendering
 void TerrainRenderer::Submit(int cullGroup, CPatch* patch)
 {
 	ENSURE(m->phase == Phase_Submit);
 
 	CPatchRData* data = (CPatchRData*)patch->GetRenderData();
 	if (data == 0)
 	{
 		// no renderdata for patch, create it now
 		data = new CPatchRData(patch, m->simulation);
 		patch->SetRenderData(data);
 	}
 	data->Update(m->simulation);
 
 	m->visiblePatches[cullGroup].push_back(data);
 }
 
 ///////////////////////////////////////////////////////////////////
 // Submit a decal for rendering
 void TerrainRenderer::Submit(int cullGroup, CModelDecal* decal)
 {
 	ENSURE(m->phase == Phase_Submit);
 
 	CDecalRData* data = (CDecalRData*)decal->GetRenderData();
 	if (data == 0)
 	{
 		// no renderdata for decal, create it now
 		data = new CDecalRData(decal, m->simulation);
 		decal->SetRenderData(data);
 	}
 	data->Update(m->simulation);
 
 	m->visibleDecals[cullGroup].push_back(data);
 }
 
 ///////////////////////////////////////////////////////////////////
 // Prepare for rendering
 void TerrainRenderer::PrepareForRendering()
 {
 	ENSURE(m->phase == Phase_Submit);
 
 	m->phase = Phase_Render;
 }
 
 ///////////////////////////////////////////////////////////////////
 // Clear submissions lists
 void TerrainRenderer::EndFrame()
 {
 	ENSURE(m->phase == Phase_Render || m->phase == Phase_Submit);
 
 	for (int i = 0; i < CSceneRenderer::CULL_MAX; ++i)
 	{
 		m->visiblePatches[i].clear();
 		m->visibleDecals[i].clear();
 	}
 
 	m->phase = Phase_Submit;
 }
 
 void TerrainRenderer::RenderTerrainOverlayTexture(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	int cullGroup, CMatrix3D& textureMatrix,
 	Renderer::Backend::GL::CTexture* texture)
 {
 #if CONFIG2_GLES
 #warning TODO: implement TerrainRenderer::RenderTerrainOverlayTexture for GLES
 	UNUSED2(deviceCommandContext);
 	UNUSED2(cullGroup);
 	UNUSED2(textureMatrix);
 	UNUSED2(texture);
 #else
 	ENSURE(m->phase == Phase_Render);
 
 	std::vector<CPatchRData*>& visiblePatches = m->visiblePatches[cullGroup];
 
 	CShaderTechniquePtr debugOverlayTech =
 		g_Renderer.GetShaderManager().LoadEffect(str_debug_overlay);
 	debugOverlayTech->BeginPass();
 	deviceCommandContext->SetGraphicsPipelineState(
 		debugOverlayTech->GetGraphicsPipelineStateDesc());
 	const CShaderProgramPtr& debugOverlayShader = debugOverlayTech->GetShader();
 
 	debugOverlayShader->BindTexture(str_baseTex, texture);
 	debugOverlayShader->Uniform(str_transform, g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection());
 	debugOverlayShader->Uniform(str_textureTransform, textureMatrix);
 	CPatchRData::RenderStreams(deviceCommandContext, visiblePatches, debugOverlayShader, STREAM_POS | STREAM_POSTOUV0);
 
 	// To make the overlay visible over water, render an additional map-sized
 	// water-height patch.
 	CBoundingBoxAligned waterBounds;
 	for (CPatchRData* data : visiblePatches)
 		waterBounds += data->GetWaterBounds();
 	if (!waterBounds.IsEmpty())
 	{
 		// Add a delta to avoid z-fighting.
 		const float height = g_Renderer.GetSceneRenderer().GetWaterManager().m_WaterHeight + 0.05f;
 		const float waterPos[] =
 		{
 			waterBounds[0].X, height, waterBounds[0].Z,
 			waterBounds[1].X, height, waterBounds[0].Z,
 			waterBounds[1].X, height, waterBounds[1].Z,
 			waterBounds[0].X, height, waterBounds[0].Z,
 			waterBounds[1].X, height, waterBounds[1].Z,
 			waterBounds[0].X, height, waterBounds[1].Z
 		};
 
 		const GLsizei stride = sizeof(float) * 3;
 		debugOverlayShader->VertexPointer(3, GL_FLOAT, stride, waterPos);
 		debugOverlayShader->TexCoordPointer(GL_TEXTURE0, 3, GL_FLOAT, stride, waterPos);
 		debugOverlayShader->AssertPointersBound();
 
-		glDrawArrays(GL_TRIANGLES, 0, 6);
+		deviceCommandContext->Draw(0, 6);
 	}
 
 	debugOverlayTech->EndPass();
 #endif
 }
 
 
 ///////////////////////////////////////////////////////////////////
 
 /**
  * Set up all the uniforms for a shader pass.
  */
 void TerrainRenderer::PrepareShader(const CShaderProgramPtr& shader, ShadowMap* shadow)
 {
 	CSceneRenderer& sceneRenderer = g_Renderer.GetSceneRenderer();
 
 	shader->Uniform(str_transform, sceneRenderer.GetViewCamera().GetViewProjection());
 	shader->Uniform(str_cameraPos, sceneRenderer.GetViewCamera().GetOrientation().GetTranslation());
 
 	const CLightEnv& lightEnv = sceneRenderer.GetLightEnv();
 
 	if (shadow)
 		shadow->BindTo(shader);
 
 	CLOSTexture& los = sceneRenderer.GetScene().GetLOSTexture();
 	shader->BindTexture(str_losTex, los.GetTextureSmooth());
 	shader->Uniform(str_losTransform, los.GetTextureMatrix()[0], los.GetTextureMatrix()[12], 0.f, 0.f);
 
 	shader->Uniform(str_ambient, lightEnv.m_AmbientColor);
 	shader->Uniform(str_sunColor, lightEnv.m_SunColor);
 	shader->Uniform(str_sunDir, lightEnv.GetSunDir());
 
 	shader->Uniform(str_fogColor, lightEnv.m_FogColor);
 	shader->Uniform(str_fogParams, lightEnv.m_FogFactor, lightEnv.m_FogMax, 0.f, 0.f);
 }
 
 void TerrainRenderer::RenderTerrainShader(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CShaderDefines& context, int cullGroup, ShadowMap* shadow)
 {
 	ENSURE(m->phase == Phase_Render);
 
 	std::vector<CPatchRData*>& visiblePatches = m->visiblePatches[cullGroup];
 	std::vector<CDecalRData*>& visibleDecals = m->visibleDecals[cullGroup];
 	if (visiblePatches.empty() && visibleDecals.empty())
 		return;
 
 	// render the solid black sides of the map first
 	CShaderTechniquePtr techSolid = g_Renderer.GetShaderManager().LoadEffect(str_solid);
 	techSolid->BeginPass();
 	Renderer::Backend::GraphicsPipelineStateDesc solidPipelineStateDesc =
 		techSolid->GetGraphicsPipelineStateDesc();
 	solidPipelineStateDesc.rasterizationState.cullMode = Renderer::Backend::CullMode::NONE;
 	deviceCommandContext->SetGraphicsPipelineState(solidPipelineStateDesc);
 
 	const CShaderProgramPtr& shaderSolid = techSolid->GetShader();
 	shaderSolid->Uniform(str_transform, g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection());
 	shaderSolid->Uniform(str_color, 0.0f, 0.0f, 0.0f, 1.0f);
 
 	CPatchRData::RenderSides(deviceCommandContext, visiblePatches, shaderSolid);
 
 	techSolid->EndPass();
 
 	CPatchRData::RenderBases(deviceCommandContext, visiblePatches, context, shadow);
 
 	// render blend passes for each patch
 	CPatchRData::RenderBlends(deviceCommandContext, visiblePatches, context, shadow);
 
 	CDecalRData::RenderDecals(deviceCommandContext, visibleDecals, context, shadow);
 
 	// restore OpenGL state
 	deviceCommandContext->BindTexture(3, GL_TEXTURE_2D, 0);
 	deviceCommandContext->BindTexture(2, GL_TEXTURE_2D, 0);
 	deviceCommandContext->BindTexture(1, GL_TEXTURE_2D, 0);
 }
 
 
 ///////////////////////////////////////////////////////////////////
 // Render un-textured patches as polygons
 void TerrainRenderer::RenderPatches(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	int cullGroup, const CShaderDefines& defines, const CColor& color)
 {
 	ENSURE(m->phase == Phase_Render);
 
 	std::vector<CPatchRData*>& visiblePatches = m->visiblePatches[cullGroup];
 	if (visiblePatches.empty())
 		return;
 
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render terrain patches");
 
 #if CONFIG2_GLES
 	UNUSED2(deviceCommandContext);
 	UNUSED2(defines);
 	UNUSED2(color);
 	#warning TODO: implement TerrainRenderer::RenderPatches for GLES
 #else
 
 	CShaderTechniquePtr solidTech = g_Renderer.GetShaderManager().LoadEffect(str_terrain_solid, defines);
 	solidTech->BeginPass();
 	deviceCommandContext->SetGraphicsPipelineState(
 		solidTech->GetGraphicsPipelineStateDesc());
 
 	const CShaderProgramPtr& solidShader = solidTech->GetShader();
 	solidShader->Uniform(str_transform, g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection());
 	solidShader->Uniform(str_color, color);
 
 	CPatchRData::RenderStreams(deviceCommandContext, visiblePatches, solidShader, STREAM_POS);
 	solidTech->EndPass();
 #endif
 }
 
 
 ///////////////////////////////////////////////////////////////////
 // Render outlines of submitted patches as lines
 void TerrainRenderer::RenderOutlines(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	int cullGroup)
 {
 	ENSURE(m->phase == Phase_Render);
 
 	std::vector<CPatchRData*>& visiblePatches = m->visiblePatches[cullGroup];
 	if (visiblePatches.empty())
 		return;
 
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render terrain outlines");
 
 	for (size_t i = 0; i < visiblePatches.size(); ++i)
 		visiblePatches[i]->RenderOutline();
 }
 
 
 ///////////////////////////////////////////////////////////////////
 // Scissor rectangle of water patches
 CBoundingBoxAligned TerrainRenderer::ScissorWater(int cullGroup, const CCamera& camera)
 {
 	CBoundingBoxAligned scissor;
 	for (const CPatchRData* data : m->visiblePatches[cullGroup])
 	{
 		const CBoundingBoxAligned& waterBounds = data->GetWaterBounds();
 		if (waterBounds.IsEmpty())
 			continue;
 
 		const CBoundingBoxAligned waterBoundsInViewPort =
 			camera.GetBoundsInViewPort(waterBounds);
 		if (!waterBoundsInViewPort.IsEmpty())
 			scissor += waterBoundsInViewPort;
 	}
 	return CBoundingBoxAligned(
 		CVector3D(Clamp(scissor[0].X, -1.0f, 1.0f), Clamp(scissor[0].Y, -1.0f, 1.0f), -1.0f),
 		CVector3D(Clamp(scissor[1].X, -1.0f, 1.0f), Clamp(scissor[1].Y, -1.0f, 1.0f), 1.0f));
 }
 
 // Render fancy water
 bool TerrainRenderer::RenderFancyWater(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CShaderDefines& context, int cullGroup, ShadowMap* shadow)
 {
 	PROFILE3_GPU("fancy water");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render fancy water");
 
 	CSceneRenderer& sceneRenderer = g_Renderer.GetSceneRenderer();
 
 	WaterManager& waterManager = sceneRenderer.GetWaterManager();
 	CShaderDefines defines = context;
 
 	// If we're using fancy water, make sure its shader is loaded
 	if (!m->fancyWaterTech || waterManager.m_NeedsReloading)
 	{
 		if (waterManager.m_WaterRealDepth)
 			defines.Add(str_USE_REAL_DEPTH, str_1);
 		if (waterManager.m_WaterFancyEffects)
 			defines.Add(str_USE_FANCY_EFFECTS, str_1);
 		if (waterManager.m_WaterRefraction)
 			defines.Add(str_USE_REFRACTION, str_1);
 		if (waterManager.m_WaterReflection)
 			defines.Add(str_USE_REFLECTION, str_1);
 
 		m->fancyWaterTech = g_Renderer.GetShaderManager().LoadEffect(str_water_high, defines);
 
 		if (!m->fancyWaterTech)
 		{
 			LOGERROR("Failed to load water shader. Falling back to a simple water.\n");
 			waterManager.m_RenderWater = false;
 			return false;
 		}
 		waterManager.m_NeedsReloading = false;
 	}
 
 	CLOSTexture& losTexture = sceneRenderer.GetScene().GetLOSTexture();
 
 	// Calculating the advanced informations about Foam and all if the quality calls for it.
 	/*if (WaterMgr->m_NeedInfoUpdate && (WaterMgr->m_WaterFoam || WaterMgr->m_WaterCoastalWaves))
 	{
 		WaterMgr->m_NeedInfoUpdate = false;
 		WaterMgr->CreateSuperfancyInfo();
 	}*/
 
 	const double time = waterManager.m_WaterTexTimer;
 	const float repeatPeriod = waterManager.m_RepeatPeriod;
 
 	m->fancyWaterTech->BeginPass();
 	deviceCommandContext->SetGraphicsPipelineState(
 		m->fancyWaterTech->GetGraphicsPipelineStateDesc());
 	const CShaderProgramPtr& fancyWaterShader = m->fancyWaterTech->GetShader();
 
 	const CCamera& camera = g_Renderer.GetSceneRenderer().GetViewCamera();
 
 	const double period = 8.0;
 	// TODO: move uploading to a prepare function during loading.
 	const CTexturePtr& currentNormalTexture = waterManager.m_NormalMap[waterManager.GetCurrentTextureIndex(period)];
 	const CTexturePtr& nextNormalTexture = waterManager.m_NormalMap[waterManager.GetNextTextureIndex(period)];
 	currentNormalTexture->UploadBackendTextureIfNeeded(deviceCommandContext);
 	nextNormalTexture->UploadBackendTextureIfNeeded(deviceCommandContext);
 	fancyWaterShader->BindTexture(str_normalMap, currentNormalTexture->GetBackendTexture());
 	fancyWaterShader->BindTexture(str_normalMap2, nextNormalTexture->GetBackendTexture());
 
 	if (waterManager.m_WaterFancyEffects)
 	{
 		fancyWaterShader->BindTexture(str_waterEffectsTex, waterManager.m_FancyTexture.get());
 	}
 
 	if (waterManager.m_WaterRefraction && waterManager.m_WaterRealDepth)
 	{
 		fancyWaterShader->BindTexture(str_depthTex, waterManager.m_RefrFboDepthTexture.get());
 		fancyWaterShader->Uniform(str_projInvTransform, waterManager.m_RefractionProjInvMatrix);
 		fancyWaterShader->Uniform(str_viewInvTransform, waterManager.m_RefractionViewInvMatrix);
 	}
 
 	if (waterManager.m_WaterRefraction)
 		fancyWaterShader->BindTexture(str_refractionMap, waterManager.m_RefractionTexture.get());
 	if (waterManager.m_WaterReflection)
 		fancyWaterShader->BindTexture(str_reflectionMap, waterManager.m_ReflectionTexture.get());
 	fancyWaterShader->BindTexture(str_losTex, losTexture.GetTextureSmooth());
 
 	const CLightEnv& lightEnv = sceneRenderer.GetLightEnv();
 
 	fancyWaterShader->Uniform(str_transform, sceneRenderer.GetViewCamera().GetViewProjection());
 
 	fancyWaterShader->BindTexture(str_skyCube, sceneRenderer.GetSkyManager().GetSkyCube());
 	// TODO: check that this rotates in the right direction.
 	CMatrix3D skyBoxRotation;
 	skyBoxRotation.SetIdentity();
 	skyBoxRotation.RotateY(M_PI + lightEnv.GetRotation());
 	fancyWaterShader->Uniform(str_skyBoxRot, skyBoxRotation);
 
 	if (waterManager.m_WaterRefraction)
 		fancyWaterShader->Uniform(str_refractionMatrix, waterManager.m_RefractionMatrix);
 	if (waterManager.m_WaterReflection)
 		fancyWaterShader->Uniform(str_reflectionMatrix, waterManager.m_ReflectionMatrix);
 
 	fancyWaterShader->Uniform(str_ambient, lightEnv.m_AmbientColor);
 	fancyWaterShader->Uniform(str_sunDir, lightEnv.GetSunDir());
 	fancyWaterShader->Uniform(str_sunColor, lightEnv.m_SunColor);
 	fancyWaterShader->Uniform(str_color, waterManager.m_WaterColor);
 	fancyWaterShader->Uniform(str_tint, waterManager.m_WaterTint);
 	fancyWaterShader->Uniform(str_waviness, waterManager.m_Waviness);
 	fancyWaterShader->Uniform(str_murkiness, waterManager.m_Murkiness);
 	fancyWaterShader->Uniform(str_windAngle, waterManager.m_WindAngle);
 	fancyWaterShader->Uniform(str_repeatScale, 1.0f / repeatPeriod);
 	fancyWaterShader->Uniform(str_losTransform, losTexture.GetTextureMatrix()[0], losTexture.GetTextureMatrix()[12], 0.f, 0.f);
 
 	fancyWaterShader->Uniform(str_cameraPos, camera.GetOrientation().GetTranslation());
 
 	fancyWaterShader->Uniform(str_fogColor, lightEnv.m_FogColor);
 	fancyWaterShader->Uniform(str_fogParams, lightEnv.m_FogFactor, lightEnv.m_FogMax, 0.f, 0.f);
 	fancyWaterShader->Uniform(str_time, (float)time);
 	fancyWaterShader->Uniform(str_screenSize, (float)g_Renderer.GetWidth(), (float)g_Renderer.GetHeight(), 0.0f, 0.0f);
 
 	if (waterManager.m_WaterType == L"clap")
 	{
 		fancyWaterShader->Uniform(str_waveParams1, 30.0f,1.5f,20.0f,0.03f);
 		fancyWaterShader->Uniform(str_waveParams2, 0.5f,0.0f,0.0f,0.0f);
 	}
 	else if (waterManager.m_WaterType == L"lake")
 	{
 		fancyWaterShader->Uniform(str_waveParams1, 8.5f,1.5f,15.0f,0.03f);
 		fancyWaterShader->Uniform(str_waveParams2, 0.2f,0.0f,0.0f,0.07f);
 	}
 	else
 	{
 		fancyWaterShader->Uniform(str_waveParams1, 15.0f,0.8f,10.0f,0.1f);
 		fancyWaterShader->Uniform(str_waveParams2, 0.3f,0.0f,0.1f,0.3f);
 	}
 
 	if (shadow)
 		shadow->BindTo(fancyWaterShader);
 
 	for (CPatchRData* data : m->visiblePatches[cullGroup])
 	{
 		data->RenderWaterSurface(deviceCommandContext, fancyWaterShader, true);
 		if (waterManager.m_WaterFancyEffects)
 			data->RenderWaterShore(deviceCommandContext, fancyWaterShader);
 	}
 	m->fancyWaterTech->EndPass();
 
 	return true;
 }
 
 void TerrainRenderer::RenderSimpleWater(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	int cullGroup)
 {
 #if CONFIG2_GLES
 	UNUSED2(deviceCommandContext);
 	UNUSED2(cullGroup);
 #else
 	PROFILE3_GPU("simple water");
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render Simple Water");
 
 	const WaterManager& waterManager = g_Renderer.GetSceneRenderer().GetWaterManager();
 	CLOSTexture& losTexture = g_Game->GetView()->GetLOSTexture();
 
 	const double time = waterManager.m_WaterTexTimer;
 
 	CShaderDefines context;
 	if (g_Renderer.GetSceneRenderer().GetWaterRenderMode() == WIREFRAME)
 		context.Add(str_MODE_WIREFRAME, str_1);
 
 	CShaderTechniquePtr waterSimpleTech =
 		g_Renderer.GetShaderManager().LoadEffect(str_water_simple, context);
 	waterSimpleTech->BeginPass();
 	deviceCommandContext->SetGraphicsPipelineState(
 		waterSimpleTech->GetGraphicsPipelineStateDesc());
 	const CShaderProgramPtr& waterSimpleShader = waterSimpleTech->GetShader();
 
 	const CTexturePtr& waterTexture = waterManager.m_WaterTexture[waterManager.GetCurrentTextureIndex(1.6)];
 	waterTexture->UploadBackendTextureIfNeeded(deviceCommandContext);
 	waterSimpleShader->BindTexture(str_baseTex, waterTexture->GetBackendTexture());
 	waterSimpleShader->BindTexture(str_losTex, losTexture.GetTextureSmooth());
 	waterSimpleShader->Uniform(str_transform, g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection());
 	waterSimpleShader->Uniform(str_losTransform, losTexture.GetTextureMatrix()[0], losTexture.GetTextureMatrix()[12], 0.f, 0.f);
 	waterSimpleShader->Uniform(str_time, static_cast<float>(time));
 	waterSimpleShader->Uniform(str_color, waterManager.m_WaterColor);
 
 	std::vector<CPatchRData*>& visiblePatches = m->visiblePatches[cullGroup];
 	for (size_t i = 0; i < visiblePatches.size(); ++i)
 	{
 		CPatchRData* data = visiblePatches[i];
 		data->RenderWaterSurface(deviceCommandContext, waterSimpleShader, false);
 	}
 
 	deviceCommandContext->BindTexture(1, GL_TEXTURE_2D, 0);
 
 	waterSimpleTech->EndPass();
 #endif
 }
 
 ///////////////////////////////////////////////////////////////////
 // Render water that is part of the terrain
 void TerrainRenderer::RenderWater(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CShaderDefines& context, int cullGroup, ShadowMap* shadow)
 {
 	const WaterManager& waterManager = g_Renderer.GetSceneRenderer().GetWaterManager();
 
 	if (!waterManager.WillRenderFancyWater())
 		RenderSimpleWater(deviceCommandContext, cullGroup);
 	else
 		RenderFancyWater(deviceCommandContext, context, cullGroup, shadow);
 }
 
 void TerrainRenderer::RenderWaterFoamOccluders(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	int cullGroup)
 {
 	CSceneRenderer& sceneRenderer = g_Renderer.GetSceneRenderer();
 	const WaterManager& waterManager = sceneRenderer.GetWaterManager();
 	if (!waterManager.WillRenderFancyWater())
 		return;
 
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render water foam occluders");
 
 	// Render normals and foam to a framebuffer if we're using fancy effects.
 	deviceCommandContext->SetFramebuffer(waterManager.m_FancyEffectsFramebuffer.get());
 
 	// Overwrite waves that would be behind the ground.
 	CShaderTechniquePtr dummyTech = g_Renderer.GetShaderManager().LoadEffect(str_solid);
 	dummyTech->BeginPass();
 	Renderer::Backend::GraphicsPipelineStateDesc pipelineStateDesc =
 		dummyTech->GetGraphicsPipelineStateDesc();
 	pipelineStateDesc.depthStencilState.depthTestEnabled = true;
 	pipelineStateDesc.rasterizationState.cullMode = Renderer::Backend::CullMode::NONE;
 	deviceCommandContext->SetGraphicsPipelineState(pipelineStateDesc);
 	const CShaderProgramPtr& dummyShader = dummyTech->GetShader();
 
 	dummyShader->Uniform(str_transform, sceneRenderer.GetViewCamera().GetViewProjection());
 	dummyShader->Uniform(str_color, 0.0f, 0.0f, 0.0f, 0.0f);
 	for (CPatchRData* data : m->visiblePatches[cullGroup])
 		data->RenderWaterShore(deviceCommandContext, dummyShader);
 	dummyTech->EndPass();
 
 	deviceCommandContext->SetFramebuffer(
 		deviceCommandContext->GetDevice()->GetCurrentBackbuffer());
 }
 
 void TerrainRenderer::RenderPriorities(CCanvas2D& canvas, int cullGroup)
 {
 	PROFILE("priorities");
 
 	ENSURE(m->phase == Phase_Render);
 
 	CTextRenderer textRenderer;
 	textRenderer.SetCurrentFont(CStrIntern("mono-stroke-10"));
 	textRenderer.SetCurrentColor(CColor(1.0f, 1.0f, 0.0f, 1.0f));
 
 	std::vector<CPatchRData*>& visiblePatches = m->visiblePatches[cullGroup];
 	for (size_t i = 0; i < visiblePatches.size(); ++i)
 		visiblePatches[i]->RenderPriorities(textRenderer);
 
 	canvas.DrawText(textRenderer);
 }
Index: ps/trunk/source/renderer/TexturedLineRData.cpp
===================================================================
--- ps/trunk/source/renderer/TexturedLineRData.cpp	(revision 26524)
+++ ps/trunk/source/renderer/TexturedLineRData.cpp	(revision 26525)
@@ -1,456 +1,459 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "TexturedLineRData.h"
 
 #include "graphics/ShaderProgram.h"
 #include "graphics/Terrain.h"
 #include "maths/Frustum.h"
 #include "maths/MathUtil.h"
 #include "maths/Quaternion.h"
 #include "ps/CStrInternStatic.h"
 #include "renderer/OverlayRenderer.h"
 #include "renderer/Renderer.h"
 #include "simulation2/Simulation2.h"
 #include "simulation2/system/SimContext.h"
 #include "simulation2/components/ICmpWaterManager.h"
 
 /* Note: this implementation uses g_VBMan directly rather than access it through the nicer VertexArray interface,
  * because it allows you to work with variable amounts of vertices and indices more easily. New code should prefer
  * to use VertexArray where possible, though. */
 
 void CTexturedLineRData::Render(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const SOverlayTexturedLine& line, const CShaderProgramPtr& shader)
 {
 	if (!m_VB || !m_VBIndices)
 		return; // might have failed to allocate
 
 	// -- render main line quad strip ----------------------
 
 	const int streamFlags = shader->GetStreamFlags();
 
 	line.m_TextureBase->UploadBackendTextureIfNeeded(deviceCommandContext);
 	line.m_TextureMask->UploadBackendTextureIfNeeded(deviceCommandContext);
+
+	m_VBIndices->m_Owner->UploadIfNeeded(deviceCommandContext);
+
 	shader->BindTexture(str_baseTex, line.m_TextureBase->GetBackendTexture());
 	shader->BindTexture(str_maskTex, line.m_TextureMask->GetBackendTexture());
 	shader->Uniform(str_objectColor, line.m_Color);
 
 	GLsizei stride = sizeof(CTexturedLineRData::SVertex);
 	CTexturedLineRData::SVertex* vertexBase =
 		reinterpret_cast<CTexturedLineRData::SVertex*>(m_VB->m_Owner->Bind(deviceCommandContext));
 
 	if (streamFlags & STREAM_POS)
 		shader->VertexPointer(3, GL_FLOAT, stride, &vertexBase->m_Position[0]);
 
 	if (streamFlags & STREAM_UV0)
 		shader->TexCoordPointer(GL_TEXTURE0, 2, GL_FLOAT, stride, &vertexBase->m_UVs[0]);
 
 	if (streamFlags & STREAM_UV1)
 		shader->TexCoordPointer(GL_TEXTURE1, 2, GL_FLOAT, stride, &vertexBase->m_UVs[0]);
 
-	u8* indexBase = m_VBIndices->m_Owner->Bind(deviceCommandContext);
-
 	shader->AssertPointersBound();
-	glDrawElements(GL_TRIANGLES, m_VBIndices->m_Count, GL_UNSIGNED_SHORT, indexBase + sizeof(u16)*m_VBIndices->m_Index);
+
+	deviceCommandContext->SetIndexBuffer(m_VBIndices->m_Owner->GetBuffer());
+	deviceCommandContext->DrawIndexed(m_VBIndices->m_Index, m_VBIndices->m_Count, 0);
 
 	g_Renderer.GetStats().m_DrawCalls++;
 	g_Renderer.GetStats().m_OverlayTris += m_VBIndices->m_Count/3;
 }
 
 void CTexturedLineRData::Update(const SOverlayTexturedLine& line)
 {
 	m_VBIndices.Reset();
 	m_VB.Reset();
 
 	if (!line.m_SimContext)
 	{
 		debug_warn(L"[TexturedLineRData] No SimContext set for textured overlay line, cannot render (no terrain data)");
 		return;
 	}
 
 	float v = 0.f;
 	std::vector<SVertex> vertices;
 	std::vector<u16> indices;
 
 	const size_t n = line.m_Coords.size(); // number of line points
 	bool closed = line.m_Closed;
 
 	ENSURE(n >= 2); // minimum needed to avoid errors (also minimum value to make sense, can't draw a line between 1 point)
 
 	// In each iteration, p1 is the position of vertex i, p0 is i-1, p2 is i+1.
 	// To avoid slightly expensive terrain computations we cycle these around and
 	// recompute p2 at the end of each iteration.
 
 	CVector3D p0;
 	CVector3D p1(line.m_Coords[0].X, 0, line.m_Coords[0].Y);
 	CVector3D p2(line.m_Coords[1].X, 0, line.m_Coords[1].Y);
 
 	if (closed)
 		// grab the ending point so as to close the loop
 		p0 = CVector3D(line.m_Coords[n - 1].X, 0, line.m_Coords[n - 1].Y);
 	else
 		// we don't want to loop around and use the direction towards the other end of the line, so create an artificial p0 that
 		// extends the p2 -> p1 direction, and use that point instead
 		p0 = p1 + (p1 - p2);
 
 	bool p1floating = false;
 	bool p2floating = false;
 
 	// Compute terrain heights, clamped to the water height (and remember whether
 	// each point was floating on water, for normal computation later)
 
 	// TODO: if we ever support more than one water level per map, recompute this per point
 	CmpPtr<ICmpWaterManager> cmpWaterManager(*line.m_SimContext, SYSTEM_ENTITY);
 	float w = cmpWaterManager ? cmpWaterManager->GetExactWaterLevel(p0.X, p0.Z) : 0.f;
 
 	const CTerrain& terrain = line.m_SimContext->GetTerrain();
 
 	p0.Y = terrain.GetExactGroundLevel(p0.X, p0.Z);
 	if (p0.Y < w)
 		p0.Y = w;
 
 	p1.Y = terrain.GetExactGroundLevel(p1.X, p1.Z);
 	if (p1.Y < w)
 	{
 		p1.Y = w;
 		p1floating = true;
 	}
 
 	p2.Y = terrain.GetExactGroundLevel(p2.X, p2.Z);
 	if (p2.Y < w)
 	{
 		p2.Y = w;
 		p2floating = true;
 	}
 
 	for (size_t i = 0; i < n; ++i)
 	{
 		// For vertex i, compute bisector of lines (i-1)..(i) and (i)..(i+1)
 		// perpendicular to terrain normal
 
 		// Normal is vertical if on water, else computed from terrain
 		CVector3D norm;
 		if (p1floating)
 			norm = CVector3D(0, 1, 0);
 		else
 			norm = terrain.CalcExactNormal(p1.X, p1.Z);
 
 		CVector3D b = ((p1 - p0).Normalized() + (p2 - p1).Normalized()).Cross(norm);
 
 		// Adjust bisector length to match the line thickness, along the line's width
 		float l = b.Dot((p2 - p1).Normalized().Cross(norm));
 		if (fabs(l) > 0.000001f) // avoid unlikely divide-by-zero
 			b *= line.m_Thickness / l;
 
 		// Push vertices and indices for each quad in GL_TRIANGLES order. The two triangles of each quad are indexed using
 		// the winding orders (BR, BL, TR) and (TR, BL, TL) (where BR is bottom-right of this iteration's quad, TR top-right etc).
 		SVertex vertex1(p1 + b + norm*OverlayRenderer::OVERLAY_VOFFSET, 0.f, v);
 		SVertex vertex2(p1 - b + norm*OverlayRenderer::OVERLAY_VOFFSET, 1.f, v);
 		vertices.push_back(vertex1);
 		vertices.push_back(vertex2);
 
 		u16 vertexCount = static_cast<u16>(vertices.size());
 		u16 index1 = vertexCount - 2; // index of vertex1 in this iteration (TR of this quad)
 		u16 index2 = vertexCount - 1; // index of the vertex2 in this iteration (TL of this quad)
 
 		if (i == 0)
 		{
 			// initial two vertices to continue building triangles from (n must be >= 2 for this to work)
 			indices.push_back(index1);
 			indices.push_back(index2);
 		}
 		else
 		{
 			u16 index1Prev = vertexCount - 4; // index of the vertex1 in the previous iteration (BR of this quad)
 			u16 index2Prev = vertexCount - 3; // index of the vertex2 in the previous iteration (BL of this quad)
 			ENSURE(index1Prev < vertexCount);
 			ENSURE(index2Prev < vertexCount);
 			// Add two corner points from last iteration and join with one of our own corners to create triangle 1
 			// (don't need to do this if i == 1 because i == 0 are the first two ones, they don't need to be copied)
 			if (i > 1)
 			{
 				indices.push_back(index1Prev);
 				indices.push_back(index2Prev);
 			}
 			indices.push_back(index1); // complete triangle 1
 
 			// create triangle 2, specifying the adjacent side's vertices in the opposite order from triangle 1
 			indices.push_back(index1);
 			indices.push_back(index2Prev);
 			indices.push_back(index2);
 		}
 
 		// alternate V coordinate for debugging
 		v = 1 - v;
 
 		// cycle the p's and compute the new p2
 		p0 = p1;
 		p1 = p2;
 		p1floating = p2floating;
 
 		// if in closed mode, wrap around the coordinate array for p2 -- otherwise, extend linearly
 		if (!closed && i == n-2)
 			// next iteration is the last point of the line, so create an artificial p2 that extends the p0 -> p1 direction
 			p2 = p1 + (p1 - p0);
 		else
 			p2 = CVector3D(line.m_Coords[(i + 2) % n].X, 0, line.m_Coords[(i + 2) % n].Y);
 
 		p2.Y = terrain.GetExactGroundLevel(p2.X, p2.Z);
 		if (p2.Y < w)
 		{
 			p2.Y = w;
 			p2floating = true;
 		}
 		else
 			p2floating = false;
 	}
 
 	if (closed)
 	{
 		// close the path
 		if (n % 2 == 0)
 		{
 			u16 vertexCount = static_cast<u16>(vertices.size());
 			indices.push_back(vertexCount - 2);
 			indices.push_back(vertexCount - 1);
 			indices.push_back(0);
 
 			indices.push_back(0);
 			indices.push_back(vertexCount - 1);
 			indices.push_back(1);
 		}
 		else
 		{
 			// add two vertices to have the good UVs for the last quad
 			SVertex vertex1(vertices[0].m_Position, 0.f, 1.f);
 			SVertex vertex2(vertices[1].m_Position, 1.f, 1.f);
 			vertices.push_back(vertex1);
 			vertices.push_back(vertex2);
 
 			u16 vertexCount = static_cast<u16>(vertices.size());
 			indices.push_back(vertexCount - 4);
 			indices.push_back(vertexCount - 3);
 			indices.push_back(vertexCount - 2);
 
 			indices.push_back(vertexCount - 2);
 			indices.push_back(vertexCount - 3);
 			indices.push_back(vertexCount - 1);
 		}
 	}
 	else
 	{
 		// Create start and end caps. On either end, this is done by taking the centroid between the last and second-to-last pair of
 		// vertices that was generated along the path (i.e. the vertex1's and vertex2's from above), taking a directional vector
 		// between them, and drawing the line cap in the plane given by the two butt-end corner points plus said vector.
 		std::vector<u16> capIndices;
 		std::vector<SVertex> capVertices;
 
 		// create end cap
 		CreateLineCap(
 			line,
 			// the order of these vertices is important here, swapping them produces caps at the wrong side
 			vertices[vertices.size()-2].m_Position, // top-right vertex of last quad
 			vertices[vertices.size()-1].m_Position, // top-left vertex of last quad
 			// directional vector between centroids of last vertex pair and second-to-last vertex pair
 			(Centroid(vertices[vertices.size()-2], vertices[vertices.size()-1]) - Centroid(vertices[vertices.size()-4], vertices[vertices.size()-3])).Normalized(),
 			line.m_EndCapType,
 			capVertices,
 			capIndices
 		);
 
 		for (unsigned i = 0; i < capIndices.size(); i++)
 			capIndices[i] += static_cast<u16>(vertices.size());
 
 		vertices.insert(vertices.end(), capVertices.begin(), capVertices.end());
 		indices.insert(indices.end(), capIndices.begin(), capIndices.end());
 
 		capIndices.clear();
 		capVertices.clear();
 
 		// create start cap
 		CreateLineCap(
 			line,
 			// the order of these vertices is important here, swapping them produces caps at the wrong side
 			vertices[1].m_Position,
 			vertices[0].m_Position,
 			// directional vector between centroids of first vertex pair and second vertex pair
 			(Centroid(vertices[1], vertices[0]) - Centroid(vertices[3], vertices[2])).Normalized(),
 			line.m_StartCapType,
 			capVertices,
 			capIndices
 		);
 
 		for (unsigned i = 0; i < capIndices.size(); i++)
 			capIndices[i] += static_cast<u16>(vertices.size());
 
 		vertices.insert(vertices.end(), capVertices.begin(), capVertices.end());
 		indices.insert(indices.end(), capIndices.begin(), capIndices.end());
 	}
 
 	if (vertices.empty() || indices.empty())
 		return;
 
 	// Indices for triangles, so must be multiple of 3.
 	ENSURE(indices.size() % 3 == 0);
 
 	m_BoundingBox = CBoundingBoxAligned();
 	for (const SVertex& vertex : vertices)
 		m_BoundingBox += vertex.m_Position;
 
 	m_VB = g_VBMan.AllocateChunk(
 		sizeof(SVertex), vertices.size(), Renderer::Backend::GL::CBuffer::Type::VERTEX, false);
 	if (m_VB) // allocation might fail (e.g. due to too many vertices)
 	{
 		m_VB->m_Owner->UpdateChunkVertices(m_VB.Get(), &vertices[0]); // copy data into VBO
 
 		for (size_t k = 0; k < indices.size(); ++k)
 			indices[k] += static_cast<u16>(m_VB->m_Index);
 
 		m_VBIndices = g_VBMan.AllocateChunk(
 			sizeof(u16), indices.size(), Renderer::Backend::GL::CBuffer::Type::INDEX, false);
 		if (m_VBIndices)
 			m_VBIndices->m_Owner->UpdateChunkVertices(m_VBIndices.Get(), &indices[0]);
 	}
 
 }
 
 void CTexturedLineRData::CreateLineCap(const SOverlayTexturedLine& line, const CVector3D& corner1, const CVector3D& corner2,
 	const CVector3D& lineDirectionNormal, SOverlayTexturedLine::LineCapType endCapType, std::vector<SVertex>& verticesOut,
 	std::vector<u16>& indicesOut)
 {
 	if (endCapType == SOverlayTexturedLine::LINECAP_FLAT)
 		return; // no action needed, this is the default
 
 	// When not in closed mode, we've created artificial points for the start- and endpoints that extend the line in the
 	// direction of the first and the last segment, respectively. Thus, we know both the start and endpoints have perpendicular
 	// butt endings, i.e. the end corner vertices on either side of the line extend perpendicularly from the segment direction.
 	// That is to say, when viewed from the top, we will have something like
 	//                                                 .
 	//  this:                     and not like this:  /|
 	//         ----+                                 / |
 	//             |                                /  .
 	//             |                                  /
 	//         ----+                                 /
 	//
 
 	int roundCapPoints = 8; // amount of points to sample along the semicircle for rounded caps (including corner points)
 	float radius = line.m_Thickness;
 
 	CVector3D centerPoint = (corner1 + corner2) * 0.5f;
 	SVertex centerVertex(centerPoint, 0.5f, 0.5f);
 	u16 indexOffset = static_cast<u16>(verticesOut.size()); // index offset in verticesOut from where we start adding our vertices
 
 	switch (endCapType)
 	{
 	case SOverlayTexturedLine::LINECAP_SHARP:
 		{
 			roundCapPoints = 3; // creates only one point directly ahead
 			radius *= 1.5f; // make it a bit sharper (note that we don't use the radius for the butt-end corner points so it should be ok)
 			centerVertex.m_UVs[0] = 0.480f; // slight visual correction to make the texture match up better at the corner points
 		}
 		FALLTHROUGH;
 	case SOverlayTexturedLine::LINECAP_ROUND:
 		{
 			// Draw a rounded line cap in the 3D plane of the line specified by the two corner points and the normal vector of the
 			// line's direction. The terrain normal at the centroid between the two corner points is perpendicular to this plane.
 			// The way this works is by taking a vector from the corner points' centroid to one of the corner points (which is then
 			// of radius length), and rotate it around the terrain normal vector in that centroid. This will rotate the vector in
 			// the line's plane, producing the desired rounded cap.
 
 			// To please OpenGL's winding order, this angle needs to be negated depending on whether we start rotating from
 			// the (center -> corner1) or (center -> corner2) vector. For the (center -> corner2) vector, we apparently need to use
 			// the negated angle.
 			float stepAngle = -(float)(M_PI/(roundCapPoints-1));
 
 			// Push the vertices in triangle fan order (easy to generate GL_TRIANGLES indices for afterwards)
 			// Note that we're manually adding the corner vertices instead of having them be generated by the rotating vector.
 			// This is because we want to support an overly large radius to make the sharp line ending look sharper.
 			verticesOut.push_back(centerVertex);
 			verticesOut.push_back(SVertex(corner2, 0.f, 0.f));
 
 			// Get the base vector that we will incrementally rotate in the cap plane to produce the radial sample points.
 			// Normally corner2 - centerPoint would suffice for this since it is of radius length, but we want to support custom
 			// radii to support tuning the 'sharpness' of sharp end caps (see above)
 			CVector3D rotationBaseVector = (corner2 - centerPoint).Normalized() * radius;
 			// Calculate the normal vector of the plane in which we're going to be drawing the line cap. This is the vector that
 			// is perpendicular to both baseVector and the 'lineDirectionNormal' vector indicating the direction of the line.
 			// Note that we shouldn't use terrain->CalcExactNormal() here because if the line is being rendered on top of water,
 			// then CalcExactNormal will return the normal vector of the terrain that's underwater (which can be quite funky).
 			CVector3D capPlaneNormal = lineDirectionNormal.Cross(rotationBaseVector).Normalized();
 
 			for (int i = 1; i < roundCapPoints - 1; ++i)
 			{
 				// Rotate the centerPoint -> corner vector by i*stepAngle radians around the cap plane normal at the center point.
 				CQuaternion quatRotation;
 				quatRotation.FromAxisAngle(capPlaneNormal, i * stepAngle);
 				CVector3D worldPos3D = centerPoint + quatRotation.Rotate(rotationBaseVector);
 
 				// Let v range from 0 to 1 as we move along the semi-circle, keep u fixed at 0 (i.e. curve the left vertical edge
 				// of the texture around the edge of the semicircle)
 				float u = 0.f;
 				float v = Clamp((i / static_cast<float>(roundCapPoints - 1)), 0.f, 1.f); // pos, u, v
 				verticesOut.push_back(SVertex(worldPos3D, u, v));
 			}
 
 			// connect back to the other butt-end corner point to complete the semicircle
 			verticesOut.push_back(SVertex(corner1, 0.f, 1.f));
 
 			// now push indices in GL_TRIANGLES order; vertices[indexOffset] is the center vertex, vertices[indexOffset + 1] is the
 			// first corner point, then a bunch of radial samples, and then at the end we have the other corner point again. So:
 			for (int i=1; i < roundCapPoints; ++i)
 			{
 				indicesOut.push_back(indexOffset); // center vertex
 				indicesOut.push_back(indexOffset + i);
 				indicesOut.push_back(indexOffset + i + 1);
 			}
 		}
 		break;
 
 	case SOverlayTexturedLine::LINECAP_SQUARE:
 		{
 			// Extend the (corner1 -> corner2) vector along the direction normal and draw a square line ending consisting of
 			// three triangles (sort of like a triangle fan)
 			// NOTE: The order in which the vertices are pushed out determines the visibility, as they
 			// are rendered only one-sided; the wrong order of vertices will make the cap visible only from the bottom.
 			verticesOut.push_back(centerVertex);
 			verticesOut.push_back(SVertex(corner2, 0.f, 0.f));
 			verticesOut.push_back(SVertex(corner2 + (lineDirectionNormal * (line.m_Thickness)), 0.f, 0.33333f)); // extend butt corner point 2 along the normal vector
 			verticesOut.push_back(SVertex(corner1 + (lineDirectionNormal * (line.m_Thickness)), 0.f, 0.66666f)); // extend butt corner point 1 along the normal vector
 			verticesOut.push_back(SVertex(corner1, 0.f, 1.0f)); // push butt corner point 1
 
 			for (int i=1; i < 4; ++i)
 			{
 				indicesOut.push_back(indexOffset); // center point
 				indicesOut.push_back(indexOffset + i);
 				indicesOut.push_back(indexOffset + i + 1);
 			}
 		}
 		break;
 
 	default:
 		break;
 	}
 
 }
 
 bool CTexturedLineRData::IsVisibleInFrustum(const CFrustum& frustum) const
 {
 	return frustum.IsBoxVisible(m_BoundingBox);
 }
Index: ps/trunk/source/renderer/VertexArray.cpp
===================================================================
--- ps/trunk/source/renderer/VertexArray.cpp	(revision 26524)
+++ ps/trunk/source/renderer/VertexArray.cpp	(revision 26525)
@@ -1,323 +1,330 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "lib/alignment.h"
 #include "lib/ogl.h"
 #include "lib/sysdep/rtl.h"
 #include "maths/Vector3D.h"
 #include "maths/Vector4D.h"
 #include "ps/CLogger.h"
 #include "graphics/Color.h"
 #include "graphics/SColor.h"
 #include "renderer/VertexArray.h"
 #include "renderer/VertexBuffer.h"
 #include "renderer/VertexBufferManager.h"
 
 
 VertexArray::VertexArray(
 	const Renderer::Backend::GL::CBuffer::Type type, const bool dynamic)
 	: m_Type(type), m_Dynamic(dynamic)
 {
 	m_NumberOfVertices = 0;
 
 	m_BackingStore = 0;
 	m_Stride = 0;
 }
 
 VertexArray::~VertexArray()
 {
 	Free();
 }
 
 // Free all resources on destruction or when a layout parameter changes
 void VertexArray::Free()
 {
 	rtl_FreeAligned(m_BackingStore);
 	m_BackingStore = 0;
 
 	m_VB.Reset();
 }
 
 // Set the number of vertices stored in the array
 void VertexArray::SetNumberOfVertices(const size_t numberOfVertices)
 {
 	if (numberOfVertices == m_NumberOfVertices)
 		return;
 
 	Free();
 	m_NumberOfVertices = numberOfVertices;
 }
 
 // Add vertex attributes like Position, Normal, UV
 void VertexArray::AddAttribute(Attribute* attr)
 {
 	ENSURE(
 		(attr->type == GL_FLOAT || attr->type == GL_SHORT || attr->type == GL_UNSIGNED_SHORT || attr->type == GL_UNSIGNED_BYTE)
 		&& "Unsupported attribute type"
 	);
 	ENSURE(attr->elems >= 1 && attr->elems <= 4);
 
 	attr->vertexArray = this;
 	m_Attributes.push_back(attr);
 
 	Free();
 }
 
 // Template specialization for GetIterator().
 // We can put this into the source file because only a fixed set of types
 // is supported for type safety.
 template<>
 VertexArrayIterator<CVector3D> VertexArray::Attribute::GetIterator<CVector3D>() const
 {
 	ENSURE(vertexArray);
 	ENSURE(type == GL_FLOAT);
 	ENSURE(elems >= 3);
 
 	return vertexArray->MakeIterator<CVector3D>(this);
 }
 
 template<>
 VertexArrayIterator<CVector4D> VertexArray::Attribute::GetIterator<CVector4D>() const
 {
 	ENSURE(vertexArray);
 	ENSURE(type == GL_FLOAT);
 	ENSURE(elems >= 4);
 
 	return vertexArray->MakeIterator<CVector4D>(this);
 }
 
 template<>
 VertexArrayIterator<float[2]> VertexArray::Attribute::GetIterator<float[2]>() const
 {
 	ENSURE(vertexArray);
 	ENSURE(type == GL_FLOAT);
 	ENSURE(elems >= 2);
 
 	return vertexArray->MakeIterator<float[2]>(this);
 }
 
 template<>
 VertexArrayIterator<SColor3ub> VertexArray::Attribute::GetIterator<SColor3ub>() const
 {
 	ENSURE(vertexArray);
 	ENSURE(type == GL_UNSIGNED_BYTE);
 	ENSURE(elems >= 3);
 
 	return vertexArray->MakeIterator<SColor3ub>(this);
 }
 
 template<>
 VertexArrayIterator<SColor4ub> VertexArray::Attribute::GetIterator<SColor4ub>() const
 {
 	ENSURE(vertexArray);
 	ENSURE(type == GL_UNSIGNED_BYTE);
 	ENSURE(elems >= 4);
 
 	return vertexArray->MakeIterator<SColor4ub>(this);
 }
 
 template<>
 VertexArrayIterator<u16> VertexArray::Attribute::GetIterator<u16>() const
 {
 	ENSURE(vertexArray);
 	ENSURE(type == GL_UNSIGNED_SHORT);
 	ENSURE(elems >= 1);
 
 	return vertexArray->MakeIterator<u16>(this);
 }
 
 template<>
 VertexArrayIterator<u16[2]> VertexArray::Attribute::GetIterator<u16[2]>() const
 {
 	ENSURE(vertexArray);
 	ENSURE(type == GL_UNSIGNED_SHORT);
 	ENSURE(elems >= 2);
 
 	return vertexArray->MakeIterator<u16[2]>(this);
 }
 
 template<>
 VertexArrayIterator<u8> VertexArray::Attribute::GetIterator<u8>() const
 {
 	ENSURE(vertexArray);
 	ENSURE(type == GL_UNSIGNED_BYTE);
 	ENSURE(elems >= 1);
 
 	return vertexArray->MakeIterator<u8>(this);
 }
 
 template<>
 VertexArrayIterator<u8[4]> VertexArray::Attribute::GetIterator<u8[4]>() const
 {
 	ENSURE(vertexArray);
 	ENSURE(type == GL_UNSIGNED_BYTE);
 	ENSURE(elems >= 4);
 
 	return vertexArray->MakeIterator<u8[4]>(this);
 }
 
 template<>
 VertexArrayIterator<short> VertexArray::Attribute::GetIterator<short>() const
 {
 	ENSURE(vertexArray);
 	ENSURE(type == GL_SHORT);
 	ENSURE(elems >= 1);
 
 	return vertexArray->MakeIterator<short>(this);
 }
 
 template<>
 VertexArrayIterator<short[2]> VertexArray::Attribute::GetIterator<short[2]>() const
 {
 	ENSURE(vertexArray);
 	ENSURE(type == GL_SHORT);
 	ENSURE(elems >= 2);
 
 	return vertexArray->MakeIterator<short[2]>(this);
 }
 
 static size_t RoundStride(size_t stride)
 {
 	if (stride <= 0)
 		return 0;
 	if (stride <= 4)
 		return 4;
 	if (stride <= 8)
 		return 8;
 	if (stride <= 16)
 		return 16;
 
 	return Align<32>(stride);
 }
 
 // Re-layout by assigning offsets on a first-come first-serve basis,
 // then round up to a reasonable stride.
 // Backing store is also created here, VBOs are created on upload.
 void VertexArray::Layout()
 {
 	Free();
 
 	m_Stride = 0;
 
 	for (ssize_t idx = m_Attributes.size()-1; idx >= 0; --idx)
 	{
 		Attribute* attr = m_Attributes[idx];
 
 		if (!attr->type || !attr->elems)
 			continue;
 
 		size_t attrSize = 0;
 		switch(attr->type)
 		{
 		case GL_UNSIGNED_BYTE:
 			attrSize = sizeof(GLubyte);
 			break;
 		case GL_SHORT:
 			attrSize = sizeof(GLshort);
 			break;
 		case GL_UNSIGNED_SHORT:
 			attrSize = sizeof(GLushort);
 			break;
 		case GL_FLOAT:
 			attrSize = sizeof(GLfloat);
 			break;
 		default:
 			attrSize = 0;
 			debug_warn(L"Bad Attribute::type"); break;
 		}
 
 		attrSize *= attr->elems;
 
 		attr->offset = m_Stride;
 
 		m_Stride += attrSize;
 
 		if (m_Type == Renderer::Backend::GL::CBuffer::Type::VERTEX)
 			m_Stride = Align<4>(m_Stride);
 	}
 
 	if (m_Type == Renderer::Backend::GL::CBuffer::Type::VERTEX)
 		m_Stride = RoundStride(m_Stride);
 
 	if (m_Stride)
 		m_BackingStore = (char*)rtl_AllocateAligned(m_Stride * m_NumberOfVertices, 16);
 }
 
 void VertexArray::PrepareForRendering()
 {
 	m_VB->m_Owner->PrepareForRendering(m_VB.Get());
 }
 
 // (Re-)Upload the attributes.
 // Create the VBO if necessary.
 void VertexArray::Upload()
 {
 	ENSURE(m_BackingStore);
 
 	if (!m_VB)
 	{
 		m_VB = g_VBMan.AllocateChunk(
 			m_Stride, m_NumberOfVertices, m_Type, m_Dynamic, m_BackingStore);
 	}
 
 	if (!m_VB)
 	{
 		LOGERROR("Failed to allocate VBO for vertex array");
 		return;
 	}
 
 	m_VB->m_Owner->UpdateChunkVertices(m_VB.Get(), m_BackingStore);
 }
 
 
 // Bind this array, returns the base address for calls to glVertexPointer etc.
 u8* VertexArray::Bind(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	if (!m_VB)
-		return NULL;
+		return nullptr;
 
-	u8* base = m_VB->m_Owner->Bind(deviceCommandContext);
-	base += m_VB->m_Index*m_Stride;
+	UploadIfNeeded(deviceCommandContext);
+	m_VB->m_Owner->Bind(deviceCommandContext);
+	u8* base = nullptr;
+	base += m_VB->m_Index * m_Stride;
 	return base;
 }
 
+void VertexArray::UploadIfNeeded(
+	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
+{
+	m_VB->m_Owner->UploadIfNeeded(deviceCommandContext);
+}
 
 // Free the backing store to save some memory
 void VertexArray::FreeBackingStore()
 {
 	// In streaming modes, the backing store must be retained
 	ENSURE(!CVertexBuffer::UseStreaming(m_Dynamic));
 
 	rtl_FreeAligned(m_BackingStore);
 	m_BackingStore = 0;
 }
 
 VertexIndexArray::VertexIndexArray(const bool dynamic) :
 	VertexArray(Renderer::Backend::GL::CBuffer::Type::INDEX, dynamic)
 {
 	m_Attr.type = GL_UNSIGNED_SHORT;
 	m_Attr.elems = 1;
 	AddAttribute(&m_Attr);
 }
 
 VertexArrayIterator<u16> VertexIndexArray::GetIterator() const
 {
 	return m_Attr.GetIterator<u16>();
 }
Index: ps/trunk/source/renderer/VertexArray.h
===================================================================
--- ps/trunk/source/renderer/VertexArray.h	(revision 26524)
+++ ps/trunk/source/renderer/VertexArray.h	(revision 26525)
@@ -1,231 +1,234 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef INCLUDED_VERTEXARRAY
 #define INCLUDED_VERTEXARRAY
 
 #include "renderer/backend/gl/Buffer.h"
 #include "renderer/backend/gl/DeviceCommandContext.h"
 #include "renderer/VertexBufferManager.h"
 
 #include <vector>
 
 // Iterator
 template<typename T>
 class VertexArrayIterator
 {
 public:
 	typedef T Type;
 
 public:
 	VertexArrayIterator() :
 		m_Data(0), m_Stride(0)
 	{
 	}
 
 	VertexArrayIterator(char* data, size_t stride) :
 		m_Data(data), m_Stride(stride)
 	{
 	}
 
 	VertexArrayIterator(const VertexArrayIterator& rhs) :
 		m_Data(rhs.m_Data), m_Stride(rhs.m_Stride)
 	{
 	}
 
 	VertexArrayIterator& operator=(const VertexArrayIterator& rhs)
 	{
 		m_Data = rhs.m_Data;
 		m_Stride = rhs.m_Stride;
 		return *this;
 	}
 
 	// Accessors
 	T& operator*() const { return *(T*)m_Data; }
 	T* operator->() const { return (T*)m_Data; }
 	T& operator[](size_t idx) const { return *(T*)(m_Data + idx*m_Stride); }
 
 	// Walking
 	VertexArrayIterator& operator++()
 	{
 		m_Data += m_Stride;
 		return *this;
 	}
 	VertexArrayIterator operator++(int)
 	{
 		VertexArrayIterator tmp = *this;
 		m_Data += m_Stride;
 		return tmp;
 	}
 	VertexArrayIterator& operator--()
 	{
 		m_Data -= m_Stride;
 		return *this;
 	}
 	VertexArrayIterator operator--(int)
 	{
 		VertexArrayIterator tmp = *this;
 		m_Data -= m_Stride;
 		return tmp;
 	}
 
 	VertexArrayIterator& operator+=(ssize_t rhs)
 	{
 		m_Data += rhs*m_Stride;
 		return *this;
 	}
 	VertexArrayIterator& operator-=(ssize_t rhs)
 	{
 		m_Data -= rhs*m_Stride;
 		return *this;
 	}
 
 	VertexArrayIterator operator+(ssize_t rhs) const
 	{
 		VertexArrayIterator tmp = *this;
 		tmp.m_Data += rhs*m_Stride;
 		return tmp;
 	}
 	VertexArrayIterator operator-(ssize_t rhs) const
 	{
 		VertexArrayIterator tmp = *this;
 		tmp.m_Data -= rhs*m_Stride;
 		return tmp;
 	}
 
 	// Accessors for raw buffer data, for performance-critical code
 	char* GetData() const
 	{
 		return m_Data;
 	}
 	size_t GetStride() const
 	{
 		return m_Stride;
 	}
 
 private:
 	char* m_Data;
 	size_t m_Stride;
 };
 
 
 // Manage a vertex array with a runtime-determined set of attributes.
 //
 // Purpose: Different rendering paths sometimes require different sets of
 // attributes (e.g. normal vector vs. color data), which is difficult to
 // support with hardcoded vertex structures.
 // This class chooses the vertex layout at runtime, based on the attributes
 // that are actually needed.
 //
 // Note that this class will not allocate any OpenGL resources until one
 // of the Upload functions is called.
 class VertexArray
 {
 public:
 	struct Attribute
 	{
 		// Data type. Currently supported: GL_FLOAT, GL_SHORT, GL_UNSIGNED_SHORT, GL_UNSIGNED_BYTE.
 		GLenum type;
 		// How many elements per vertex (e.g. 3 for RGB, 2 for UV)
 		GLuint elems;
 
 		// Offset (in bytes) into a vertex structure (filled in by Layout())
 		size_t offset;
 
 		VertexArray* vertexArray;
 
 		Attribute() : type(0), elems(0), offset(0), vertexArray(0) { }
 
 		// Get an iterator over the backing store for the given attribute that
 		// initially points at the first vertex.
 		// Supported types T: CVector3D, CVector4D, float[2], SColor3ub, SColor4ub,
 		// u16, u16[2], u8, u8[4], short, short[2].
 		// This function verifies at runtime that the requested type T matches
 		// the attribute definition passed to AddAttribute().
 		template<typename T>
 		VertexArrayIterator<T> GetIterator() const;
 	};
 
 public:
 	VertexArray(
 		const Renderer::Backend::GL::CBuffer::Type type, const bool dynamic);
 	~VertexArray();
 
 	// Set the number of vertices stored in the array
 	void SetNumberOfVertices(const size_t numberOfVertices);
 	// Add vertex attributes
 	void AddAttribute(Attribute* attr);
 
 	size_t GetNumberOfVertices() const { return m_NumberOfVertices; }
 	size_t GetStride() const { return m_Stride; }
 
 	// Layout the vertex array format and create backing buffer in RAM.
 	// You must call Layout() after changing the number of vertices or
 	// attributes.
 	// All vertex data is lost when a vertex array is re-layouted.
 	void Layout();
 	// (Re-)Upload the attributes of the vertex array from the backing store to
-	// the underlying VBO object.
+	// the underlying buffer.
 	void Upload();
 	// Make this vertex array's data available for the next series of calls to Bind
 	void PrepareForRendering();
 	// Bind this array, returns the base address for calls to glVertexPointer etc.
 	u8* Bind(Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext);
+	void UploadIfNeeded(Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext);
 
 	// If you know for certain that you'll never have to change the data again,
 	// call this to free some memory.
 	void FreeBackingStore();
 
+	Renderer::Backend::GL::CBuffer* GetBuffer() { return m_VB ? m_VB->m_Owner->GetBuffer() : nullptr; }
+
+	uint32_t GetOffset() const { return m_VB ? m_VB->m_Index : 0; }
+
 private:
 	void Free();
 
 	template<typename T>
 	VertexArrayIterator<T> MakeIterator(const Attribute* attr)
 	{
 		ENSURE(attr->type && attr->elems);
 		return VertexArrayIterator<T>(m_BackingStore + attr->offset, m_Stride);
 	}
 
 	Renderer::Backend::GL::CBuffer::Type m_Type;
 	bool m_Dynamic;
 	size_t m_NumberOfVertices;
 	std::vector<Attribute*> m_Attributes;
 
 	CVertexBufferManager::Handle m_VB;
 	size_t m_Stride;
 	char* m_BackingStore; // 16-byte aligned, to allow fast SSE access
 };
 
 /**
  * A VertexArray that is specialised to handle 16-bit array indices.
- * Call Bind() and pass the return value to the indices parameter of
- * glDrawElements/glDrawRangeElements/glMultiDrawElements.
- * Use CVertexBuffer::Unbind() to unbind the array when done.
+ * Call UploadIfNeeded() before use in Draw/DrawIndexed.
  */
 class VertexIndexArray : public VertexArray
 {
 public:
 	VertexIndexArray(const bool dynamic);
 
 	/// Gets the iterator over the (only) attribute in this array, i.e. a u16.
 	VertexArrayIterator<u16> GetIterator() const;
 
 private:
 	Attribute m_Attr;
 };
 
 #endif // INCLUDED_VERTEXARRAY
Index: ps/trunk/source/renderer/VertexBuffer.cpp
===================================================================
--- ps/trunk/source/renderer/VertexBuffer.cpp	(revision 26524)
+++ ps/trunk/source/renderer/VertexBuffer.cpp	(revision 26525)
@@ -1,339 +1,344 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "VertexBuffer.h"
 
 #include "lib/ogl.h"
 #include "lib/sysdep/cpu.h"
 #include "ps/CLogger.h"
 #include "ps/Errors.h"
 #include "ps/VideoMode.h"
 #include "renderer/backend/gl/Device.h"
 #include "renderer/Renderer.h"
 
 #include <algorithm>
 #include <cstring>
 #include <iterator>
 
 // Absolute maximum (bytewise) size of each GL vertex buffer object.
 // Make it large enough for the maximum feasible mesh size (64K vertexes,
 // 64 bytes per vertex in InstancingModelRenderer).
 // TODO: measure what influence this has on performance
 constexpr std::size_t MAX_VB_SIZE_BYTES = 4 * 1024 * 1024;
 
 CVertexBuffer::CVertexBuffer(
 	const char* name, const size_t vertexSize,
 	const Renderer::Backend::GL::CBuffer::Type type, const bool dynamic)
 	: CVertexBuffer(name, vertexSize, type, dynamic, MAX_VB_SIZE_BYTES)
 {
 }
 
 CVertexBuffer::CVertexBuffer(
 	const char* name, const size_t vertexSize,
 	const Renderer::Backend::GL::CBuffer::Type type, const bool dynamic,
 	const size_t maximumBufferSize)
 	: m_VertexSize(vertexSize), m_HasNeededChunks(false)
 {
 	size_t size = maximumBufferSize;
 
 	if (type == Renderer::Backend::GL::CBuffer::Type::VERTEX)
 	{
 		// We want to store 16-bit indices to any vertex in a buffer, so the
 		// buffer must never be bigger than vertexSize*64K bytes since we can
 		// address at most 64K of them with 16-bit indices
 		size = std::min(size, vertexSize * 65536);
 	}
+	else if (type == Renderer::Backend::GL::CBuffer::Type::INDEX)
+	{
+		ENSURE(vertexSize == sizeof(u16));
+	}
 
 	// store max/free vertex counts
 	m_MaxVertices = m_FreeVertices = size / vertexSize;
 
 	m_Buffer = g_VideoMode.GetBackendDevice()->CreateBuffer(
 		name, type, m_MaxVertices * m_VertexSize, dynamic);
 
 	// create sole free chunk
 	VBChunk* chunk = new VBChunk;
 	chunk->m_Owner = this;
 	chunk->m_Count = m_FreeVertices;
 	chunk->m_Index = 0;
 	m_FreeList.emplace_back(chunk);
 }
 
 CVertexBuffer::~CVertexBuffer()
 {
 	// Must have released all chunks before destroying the buffer
 	ENSURE(m_AllocList.empty());
 
 	m_Buffer.reset();
 
 	for (VBChunk* const& chunk : m_FreeList)
 		delete chunk;
 }
 
 bool CVertexBuffer::CompatibleVertexType(
 	const size_t vertexSize, const Renderer::Backend::GL::CBuffer::Type type,
 	const bool dynamic) const
 {
 	ENSURE(m_Buffer);
 	return type == m_Buffer->GetType() && dynamic == m_Buffer->IsDynamic() && vertexSize == m_VertexSize;
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 // Allocate: try to allocate a buffer of given number of vertices (each of
 // given size), with the given type, and using the given texture - return null
 // if no free chunks available
 CVertexBuffer::VBChunk* CVertexBuffer::Allocate(
 	const size_t vertexSize, const size_t numberOfVertices,
 	const Renderer::Backend::GL::CBuffer::Type type, const bool dynamic,
 	void* backingStore)
 {
 	// check this is the right kind of buffer
 	if (!CompatibleVertexType(vertexSize, type, dynamic))
 		return nullptr;
 
 	if (UseStreaming(dynamic))
 		ENSURE(backingStore != nullptr);
 
 	// quick check there's enough vertices spare to allocate
 	if (numberOfVertices > m_FreeVertices)
 		return nullptr;
 
 	// trawl free list looking for first free chunk with enough space
 	std::vector<VBChunk*>::iterator best_iter = m_FreeList.end();
 	for (std::vector<VBChunk*>::iterator iter = m_FreeList.begin(); iter != m_FreeList.end(); ++iter)
 	{
 		if (numberOfVertices == (*iter)->m_Count)
 		{
 			best_iter = iter;
 			break;
 		}
 		else if (numberOfVertices < (*iter)->m_Count && (best_iter == m_FreeList.end() || (*best_iter)->m_Count < (*iter)->m_Count))
 			best_iter = iter;
 	}
 
 	// We could not find a large enough chunk.
 	if (best_iter == m_FreeList.end())
 		return nullptr;
 
 	VBChunk* chunk = *best_iter;
 	m_FreeList.erase(best_iter);
 	m_FreeVertices -= chunk->m_Count;
 
 	chunk->m_BackingStore = backingStore;
 	chunk->m_Dirty = false;
 	chunk->m_Needed = false;
 
 	// split chunk into two; - allocate a new chunk using all unused vertices in the
 	// found chunk, and add it to the free list
 	if (chunk->m_Count > numberOfVertices)
 	{
 		VBChunk* newchunk = new VBChunk;
 		newchunk->m_Owner = this;
 		newchunk->m_Count = chunk->m_Count - numberOfVertices;
 		newchunk->m_Index = chunk->m_Index + numberOfVertices;
 		m_FreeList.emplace_back(newchunk);
 		m_FreeVertices += newchunk->m_Count;
 
 		// resize given chunk
 		chunk->m_Count = numberOfVertices;
 	}
 
 	// return found chunk
 	m_AllocList.push_back(chunk);
 	return chunk;
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 // Release: return given chunk to this buffer
 void CVertexBuffer::Release(VBChunk* chunk)
 {
 	// Update total free count before potentially modifying this chunk's count
 	m_FreeVertices += chunk->m_Count;
 
 	m_AllocList.erase(std::find(m_AllocList.begin(), m_AllocList.end(), chunk));
 
 	// Sorting O(nlogn) shouldn't be too far from O(n) by performance, because
 	// the container is partly sorted already.
 	std::sort(
 		m_FreeList.begin(), m_FreeList.end(),
 		[](const VBChunk* chunk1, const VBChunk* chunk2) -> bool
 		{
 			return chunk1->m_Index < chunk2->m_Index;
 		});
 
 	// Coalesce with any free-list items that are adjacent to this chunk;
 	// merge the found chunk with the new one, and remove the old one
 	// from the list.
 	for (std::vector<VBChunk*>::iterator iter = m_FreeList.begin(); iter != m_FreeList.end();)
 	{
 		if ((*iter)->m_Index == chunk->m_Index + chunk->m_Count
 		 || (*iter)->m_Index + (*iter)->m_Count == chunk->m_Index)
 		{
 			chunk->m_Index = std::min(chunk->m_Index, (*iter)->m_Index);
 			chunk->m_Count += (*iter)->m_Count;
 			delete *iter;
 			iter = m_FreeList.erase(iter);
 			if (!m_FreeList.empty() && iter != m_FreeList.begin())
 				iter = std::prev(iter);
 		}
 		else
 		{
 			++iter;
 		}
 	}
 
 	m_FreeList.emplace_back(chunk);
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 // UpdateChunkVertices: update vertex data for given chunk
 void CVertexBuffer::UpdateChunkVertices(VBChunk* chunk, void* data)
 {
 	ENSURE(m_Buffer);
 	if (UseStreaming(m_Buffer->IsDynamic()))
 	{
 		// The VBO is now out of sync with the backing store
 		chunk->m_Dirty = true;
 
 		// Sanity check: Make sure the caller hasn't tried to reallocate
 		// their backing store
 		ENSURE(data == chunk->m_BackingStore);
 	}
 	else
 	{
 		ENSURE(data);
 		g_Renderer.GetDeviceCommandContext()->UploadBufferRegion(
 			m_Buffer.get(), data, chunk->m_Index * m_VertexSize, chunk->m_Count * m_VertexSize);
 	}
 }
 
-///////////////////////////////////////////////////////////////////////////////
-// Bind: bind to this buffer; return pointer to address required as parameter
-// to glVertexPointer ( + etc) calls
-u8* CVertexBuffer::Bind(
+void CVertexBuffer::UploadIfNeeded(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	if (UseStreaming(m_Buffer->IsDynamic()))
 	{
 		if (!m_HasNeededChunks)
-		{
-			deviceCommandContext->BindBuffer(m_Buffer->GetType(), m_Buffer.get());
-			return nullptr;
-		}
+			return;
 
 		// If any chunks are out of sync with the current VBO, and are
 		// needed for rendering this frame, we'll need to re-upload the VBO
 		bool needUpload = false;
 		for (VBChunk* const& chunk : m_AllocList)
 		{
 			if (chunk->m_Dirty && chunk->m_Needed)
 			{
 				needUpload = true;
 				break;
 			}
 		}
 
 		if (needUpload)
 		{
 			deviceCommandContext->UploadBuffer(m_Buffer.get(), [&](u8* mappedData)
 			{
 #ifndef NDEBUG
 				// To help detect bugs where PrepareForRendering() was not called,
 				// force all not-needed data to 0, so things won't get rendered
 				// with undefined (but possibly still correct-looking) data.
 				memset(mappedData, 0, m_MaxVertices * m_VertexSize);
 #endif
 
 				// Copy only the chunks we need. (This condition is helpful when
 				// the VBO contains data for every unit in the world, but only a
 				// handful are visible on screen and we don't need to bother copying
 				// the rest.)
 				for (VBChunk* const& chunk : m_AllocList)
 					if (chunk->m_Needed)
 						std::memcpy(mappedData + chunk->m_Index * m_VertexSize, chunk->m_BackingStore, chunk->m_Count * m_VertexSize);
 			});
 
 			// Anything we just uploaded is clean; anything else is dirty
 			// since the rest of the VBO content is now undefined
 			for (VBChunk* const& chunk : m_AllocList)
 			{
 				if (chunk->m_Needed)
 				{
 					chunk->m_Dirty = false;
 					chunk->m_Needed = false;
 				}
 				else
 					chunk->m_Dirty = true;
 			}
 		}
 		else
 		{
 			// Reset the flags for the next phase.
 			for (VBChunk* const& chunk : m_AllocList)
 				chunk->m_Needed = false;
 		}
 
 		m_HasNeededChunks = false;
 	}
-	deviceCommandContext->BindBuffer(m_Buffer->GetType(), m_Buffer.get());
+}
 
+// Bind: bind to this buffer; return pointer to address required as parameter
+// to glVertexPointer ( + etc) calls
+u8* CVertexBuffer::Bind(
+	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
+{
+	UploadIfNeeded(deviceCommandContext);
+	deviceCommandContext->BindBuffer(m_Buffer->GetType(), m_Buffer.get());
 	return nullptr;
 }
 
 void CVertexBuffer::Unbind(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext)
 {
 	deviceCommandContext->BindBuffer(
 		Renderer::Backend::GL::CBuffer::Type::VERTEX, nullptr);
 	deviceCommandContext->BindBuffer(
 		Renderer::Backend::GL::CBuffer::Type::INDEX, nullptr);
 }
 
 size_t CVertexBuffer::GetBytesReserved() const
 {
 	return MAX_VB_SIZE_BYTES;
 }
 
 size_t CVertexBuffer::GetBytesAllocated() const
 {
 	return (m_MaxVertices - m_FreeVertices) * m_VertexSize;
 }
 
 void CVertexBuffer::DumpStatus() const
 {
 	debug_printf("freeverts = %d\n", static_cast<int>(m_FreeVertices));
 
 	size_t maxSize = 0;
 	for (VBChunk* const& chunk : m_FreeList)
 	{
 		debug_printf("free chunk %p: size=%d\n", static_cast<void *>(chunk), static_cast<int>(chunk->m_Count));
 		maxSize = std::max(chunk->m_Count, maxSize);
 	}
 	debug_printf("max size = %d\n", static_cast<int>(maxSize));
 }
 
 bool CVertexBuffer::UseStreaming(const bool dynamic)
 {
 	return dynamic;
 }
 
 void CVertexBuffer::PrepareForRendering(VBChunk* chunk)
 {
 	chunk->m_Needed = true;
 	m_HasNeededChunks = true;
 }
Index: ps/trunk/source/renderer/VertexBuffer.h
===================================================================
--- ps/trunk/source/renderer/VertexBuffer.h	(revision 26524)
+++ ps/trunk/source/renderer/VertexBuffer.h	(revision 26525)
@@ -1,166 +1,170 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 /*
  * encapsulation of VBOs with batching and sharing
  */
 
 #ifndef INCLUDED_VERTEXBUFFER
 #define INCLUDED_VERTEXBUFFER
 
 #include "renderer/backend/gl/Buffer.h"
 #include "renderer/backend/gl/DeviceCommandContext.h"
 
 #include <memory>
 #include <vector>
 
 /**
  * CVertexBuffer: encapsulation of ARB_vertex_buffer_object, also supplying
  * some additional functionality for sharing buffers between multiple objects.
  *
  * The class can be used in two modes, depending on the usage parameter:
  *
  * GL_STATIC_DRAW: Call Allocate() with backingStore = NULL. Then call
  * UpdateChunkVertices() with any pointer - the data will be immediately copied
  * to the VBO. This should be used for vertex data that rarely changes.
  *
  * GL_DYNAMIC_DRAW, GL_STREAM_DRAW: Call Allocate() with backingStore pointing
  * at some memory that will remain valid for the lifetime of the CVertexBuffer.
  * This should be used for vertex data that may change every frame.
  * Rendering is expected to occur in two phases:
  *   - "Prepare" phase:
  *       If this chunk is going to be used for rendering during the next Bind phase,
  *       you must call PrepareForRendering().
  *       If the vertex data in backingStore has been modified since the last Bind phase,
  *       you must call UpdateChunkVertices().
  *   - "Bind" phase:
  *       Bind() can be called (multiple times). The vertex data will be uploaded
  *       to the GPU if necessary.
  * It is okay to have multiple prepare/bind cycles per frame (though slightly less
  * efficient), but they must occur sequentially.
  */
 class CVertexBuffer
 {
 	NONCOPYABLE(CVertexBuffer);
 
 public:
 
 	/// VBChunk: describes a portion of this vertex buffer
 	struct VBChunk
 	{
 		/// Owning (parent) vertex buffer
 		CVertexBuffer* m_Owner;
 		/// Start index of this chunk in owner
 		size_t m_Index;
 		/// Number of vertices used by chunk
 		size_t m_Count;
 		/// If UseStreaming() is true, points at the data for this chunk
 		void* m_BackingStore;
 
 		/// If true, the VBO is not consistent with the chunk's backing store
 		/// (and will need to be re-uploaded before rendering with this chunk)
 		bool m_Dirty;
 
 		/// If true, we have been told this chunk is going to be used for
 		/// rendering in the next bind phase and will need to be uploaded
 		bool m_Needed;
 
 	private:
 		// Only CVertexBuffer can construct/delete these
 		// (Other people should use g_VBMan.Allocate, g_VBMan.Release)
 		friend class CVertexBuffer;
 		VBChunk() {}
 		~VBChunk() {}
 	};
 
 public:
 	// constructor, destructor
 	CVertexBuffer(
 		const char* name, const size_t vertexSize,
 		const Renderer::Backend::GL::CBuffer::Type type, const bool dynamic);
 	CVertexBuffer(
 		const char* name, const size_t vertexSize,
 		const Renderer::Backend::GL::CBuffer::Type type, const bool dynamic,
 		const size_t maximumBufferSize);
 	~CVertexBuffer();
 
 	/// Bind to this buffer; return pointer to address required as parameter
 	/// to glVertexPointer ( + etc) calls
 	u8* Bind(Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext);
 
+	void UploadIfNeeded(Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext);
+
 	/// Unbind any currently-bound buffer, so glVertexPointer etc calls will not attempt to use it
 	static void Unbind(
 		Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext);
 
 	/// Make the vertex data available for the next call to Bind()
 	void PrepareForRendering(VBChunk* chunk);
 
 	/// Update vertex data for given chunk. Transfers the provided data to the actual OpenGL vertex buffer.
 	void UpdateChunkVertices(VBChunk* chunk, void* data);
 
 	size_t GetVertexSize() const { return m_VertexSize; }
 	size_t GetBytesReserved() const;
 	size_t GetBytesAllocated() const;
 
 	/// Returns true if this vertex buffer is compatible with the specified vertex type and intended usage.
 	bool CompatibleVertexType(
 		const size_t vertexSize, const Renderer::Backend::GL::CBuffer::Type type,
 		const bool dynamic) const;
 
 	void DumpStatus() const;
 
 	/**
 	 * Given the usage flags of a buffer that has been (or will be) allocated:
 	 *
 	 * If true, we assume the buffer is going to be modified on every frame,
 	 * so we will re-upload the entire buffer every frame using glMapBuffer.
 	 * This requires the buffer's owner to hold onto its backing store.
 	 *
 	 * If false, we assume it will change rarely, and use direct upload to
 	 * update it incrementally. The backing store can be freed to save memory.
 	 */
 	static bool UseStreaming(const bool dynamic);
 
+	Renderer::Backend::GL::CBuffer* GetBuffer() { return m_Buffer.get(); }
+
 private:
 	friend class CVertexBufferManager;		// allow allocate only via CVertexBufferManager
 
 	/// Try to allocate a buffer of given number of vertices (each of given size),
 	/// and with the given type - return null if no free chunks available
 	VBChunk* Allocate(
 		const size_t vertexSize, const size_t numberOfVertices,
 		const Renderer::Backend::GL::CBuffer::Type type, const bool dynamic,
 		void* backingStore);
 	/// Return given chunk to this buffer
 	void Release(VBChunk* chunk);
 
 	/// Vertex size of this vertex buffer
 	size_t m_VertexSize;
 	/// Number of vertices of above size in this buffer
 	size_t m_MaxVertices;
 	/// List of free chunks in this buffer
 	std::vector<VBChunk*> m_FreeList;
 	/// List of allocated chunks
 	std::vector<VBChunk*> m_AllocList;
 	/// Available free vertices - total of all free vertices in the free list
 	size_t m_FreeVertices;
 
 	std::unique_ptr<Renderer::Backend::GL::CBuffer> m_Buffer;
 
 	bool m_HasNeededChunks;
 };
 
 #endif // INCLUDED_VERTEXBUFFER
Index: ps/trunk/source/renderer/WaterManager.cpp
===================================================================
--- ps/trunk/source/renderer/WaterManager.cpp	(revision 26524)
+++ ps/trunk/source/renderer/WaterManager.cpp	(revision 26525)
@@ -1,1024 +1,1024 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "precompiled.h"
 
 #include "graphics/Terrain.h"
 #include "graphics/TextureManager.h"
 #include "graphics/ShaderManager.h"
 #include "graphics/ShaderProgram.h"
 #include "lib/bits.h"
 #include "lib/timer.h"
 #include "lib/ogl.h"
 #include "lib/tex/tex.h"
 #include "maths/MathUtil.h"
 #include "maths/Vector2D.h"
 #include "ps/CLogger.h"
 #include "ps/CStrInternStatic.h"
 #include "ps/Game.h"
 #include "ps/VideoMode.h"
 #include "ps/World.h"
 #include "renderer/backend/gl/Device.h"
 #include "renderer/Renderer.h"
 #include "renderer/RenderingOptions.h"
 #include "renderer/SceneRenderer.h"
 #include "renderer/WaterManager.h"
 #include "simulation2/Simulation2.h"
 #include "simulation2/components/ICmpWaterManager.h"
 #include "simulation2/components/ICmpRangeManager.h"
 
 #include <algorithm>
 
 struct CoastalPoint
 {
 	CoastalPoint(int idx, CVector2D pos) : index(idx), position(pos) {};
 	int index;
 	CVector2D position;
 };
 
 struct SWavesVertex
 {
 	// vertex position
 	CVector3D m_BasePosition;
 	CVector3D m_ApexPosition;
 	CVector3D m_SplashPosition;
 	CVector3D m_RetreatPosition;
 
 	CVector2D m_PerpVect;
 	u8 m_UV[3];
 
 	// pad to a power of two
 	u8 m_Padding[5];
 };
 cassert(sizeof(SWavesVertex) == 64);
 
 struct WaveObject
 {
 	CVertexBufferManager::Handle m_VBVertices;
 	CBoundingBoxAligned m_AABB;
 	size_t m_Width;
 	float m_TimeDiff;
 };
 
 WaterManager::WaterManager()
 {
 	// water
 	m_RenderWater = false; // disabled until textures are successfully loaded
 	m_WaterHeight = 5.0f;
 
 	m_RefTextureSize = 0;
 
 	m_WaterTexTimer = 0.0;
 
 	m_WindAngle = 0.0f;
 	m_Waviness = 8.0f;
 	m_WaterColor = CColor(0.3f, 0.35f, 0.7f, 1.0f);
 	m_WaterTint = CColor(0.28f, 0.3f, 0.59f, 1.0f);
 	m_Murkiness = 0.45f;
 	m_RepeatPeriod = 16.0f;
 
 	m_WaterEffects = true;
 	m_WaterFancyEffects = false;
 	m_WaterRealDepth = false;
 	m_WaterRefraction = false;
 	m_WaterReflection = false;
 	m_WaterType = L"ocean";
 
 	m_NeedsReloading = false;
 	m_NeedInfoUpdate = true;
 
 	m_MapSize = 0;
 
 	m_updatei0 = 0;
 	m_updatej0 = 0;
 	m_updatei1 = 0;
 	m_updatej1 = 0;
 }
 
 WaterManager::~WaterManager()
 {
 	// Cleanup if the caller messed up
 	UnloadWaterTextures();
 
 	m_ShoreWaves.clear();
 	m_ShoreWavesVBIndices.Reset();
 
 	m_DistanceHeightmap.reset();
 	m_WindStrength.reset();
 
 	m_FancyEffectsFramebuffer.reset();
 	m_RefractionFramebuffer.reset();
 	m_ReflectionFramebuffer.reset();
 
 	m_FancyTexture.reset();
 	m_FancyTextureDepth.reset();
 	m_ReflFboDepthTexture.reset();
 	m_RefrFboDepthTexture.reset();
 }
 
 
 ///////////////////////////////////////////////////////////////////
 // Progressive load of water textures
 int WaterManager::LoadWaterTextures()
 {
 	// TODO: this doesn't need to be progressive-loading any more
 	// (since texture loading is async now)
 
 	wchar_t pathname[PATH_MAX];
 
 	// Load diffuse grayscale images (for non-fancy water)
 	for (size_t i = 0; i < ARRAY_SIZE(m_WaterTexture); ++i)
 	{
 		swprintf_s(pathname, ARRAY_SIZE(pathname), L"art/textures/animated/water/default/diffuse%02d.dds", (int)i+1);
 		CTextureProperties textureProps(pathname);
 		textureProps.SetAddressMode(
 			Renderer::Backend::Sampler::AddressMode::REPEAT);
 
 		CTexturePtr texture = g_Renderer.GetTextureManager().CreateTexture(textureProps);
 		texture->Prefetch();
 		m_WaterTexture[i] = texture;
 	}
 
 	m_RenderWater = true;
 
 	if (!WillRenderFancyWater())
 		return 0;
 
 #if CONFIG2_GLES
 #warning Fix WaterManager::LoadWaterTextures on GLES
 #else
 	// Load normalmaps (for fancy water)
 	ReloadWaterNormalTextures();
 
 	// Load CoastalWaves
 	{
 		CTextureProperties textureProps(L"art/textures/terrain/types/water/coastalWave.png");
 		textureProps.SetAddressMode(
 			Renderer::Backend::Sampler::AddressMode::REPEAT);
 		CTexturePtr texture = g_Renderer.GetTextureManager().CreateTexture(textureProps);
 		texture->Prefetch();
 		m_WaveTex = texture;
 	}
 
 	// Load Foam
 	{
 		CTextureProperties textureProps(L"art/textures/terrain/types/water/foam.png");
 		textureProps.SetAddressMode(
 			Renderer::Backend::Sampler::AddressMode::REPEAT);
 		CTexturePtr texture = g_Renderer.GetTextureManager().CreateTexture(textureProps);
 		texture->Prefetch();
 		m_FoamTex = texture;
 	}
 
 	// Use screen-sized textures for minimum artifacts.
 	m_RefTextureSize = round_up_to_pow2(g_Renderer.GetHeight());
 
 	Renderer::Backend::GL::CDevice* backendDevice = g_VideoMode.GetBackendDevice();
 
 	// Create reflection texture
 	m_ReflectionTexture = backendDevice->CreateTexture2D("WaterReflectionTexture",
 		Renderer::Backend::Format::R8G8B8A8, m_RefTextureSize, m_RefTextureSize,
 		Renderer::Backend::Sampler::MakeDefaultSampler(
 			Renderer::Backend::Sampler::Filter::LINEAR,
 			Renderer::Backend::Sampler::AddressMode::MIRRORED_REPEAT));
 
 	// Create refraction texture
 	m_RefractionTexture = backendDevice->CreateTexture2D("WaterRefractionTexture",
 		Renderer::Backend::Format::R8G8B8A8, m_RefTextureSize, m_RefTextureSize,
 		Renderer::Backend::Sampler::MakeDefaultSampler(
 			Renderer::Backend::Sampler::Filter::LINEAR,
 			Renderer::Backend::Sampler::AddressMode::MIRRORED_REPEAT));
 
 	// Create depth textures
 	m_ReflFboDepthTexture = backendDevice->CreateTexture2D("WaterReflectionDepthTexture",
 		Renderer::Backend::Format::D32, m_RefTextureSize, m_RefTextureSize,
 		Renderer::Backend::Sampler::MakeDefaultSampler(
 			Renderer::Backend::Sampler::Filter::NEAREST,
 			Renderer::Backend::Sampler::AddressMode::REPEAT));
 
 	m_RefrFboDepthTexture = backendDevice->CreateTexture2D("WaterRefractionDepthTexture",
 		Renderer::Backend::Format::D32, m_RefTextureSize, m_RefTextureSize,
 		Renderer::Backend::Sampler::MakeDefaultSampler(
 			Renderer::Backend::Sampler::Filter::NEAREST,
 			Renderer::Backend::Sampler::AddressMode::REPEAT));
 
 	Resize();
 
 	// Create the water framebuffers
 
 	m_ReflectionFramebuffer = backendDevice->CreateFramebuffer("ReflectionFramebuffer",
 		m_ReflectionTexture.get(), m_ReflFboDepthTexture.get(), CColor(0.5f, 0.5f, 1.0f, 0.0f));
 	if (!m_ReflectionFramebuffer)
 	{
 		g_RenderingOptions.SetWaterReflection(false);
 		UpdateQuality();
 	}
 
 	m_RefractionFramebuffer = backendDevice->CreateFramebuffer("RefractionFramebuffer",
 		m_RefractionTexture.get(), m_RefrFboDepthTexture.get(), CColor(1.0f, 0.0f, 0.0f, 0.0f));
 	if (!m_RefractionFramebuffer)
 	{
 		g_RenderingOptions.SetWaterRefraction(false);
 		UpdateQuality();
 	}
 
 	m_FancyEffectsFramebuffer = backendDevice->CreateFramebuffer("FancyEffectsFramebuffer",
 		m_FancyTexture.get(), m_FancyTextureDepth.get());
 	if (!m_FancyEffectsFramebuffer)
 	{
 		g_RenderingOptions.SetWaterRefraction(false);
 		UpdateQuality();
 	}
 #endif
 	return 0;
 }
 
 
 ///////////////////////////////////////////////////////////////////
 // Resize: Updates the fancy water textures.
 void WaterManager::Resize()
 {
 	Renderer::Backend::GL::CDevice* backendDevice = g_VideoMode.GetBackendDevice();
 
 	// Create the Fancy Effects texture
 	m_FancyTexture = backendDevice->CreateTexture2D("WaterFancyTexture",
 		Renderer::Backend::Format::R8G8B8A8, g_Renderer.GetWidth(), g_Renderer.GetHeight(),
 		Renderer::Backend::Sampler::MakeDefaultSampler(
 			Renderer::Backend::Sampler::Filter::LINEAR,
 			Renderer::Backend::Sampler::AddressMode::REPEAT));
 
 	m_FancyTextureDepth = backendDevice->CreateTexture2D("WaterFancyDepthTexture",
 		Renderer::Backend::Format::D32, g_Renderer.GetWidth(), g_Renderer.GetHeight(),
 		Renderer::Backend::Sampler::MakeDefaultSampler(
 			Renderer::Backend::Sampler::Filter::LINEAR,
 			Renderer::Backend::Sampler::AddressMode::REPEAT));
 }
 
 void WaterManager::ReloadWaterNormalTextures()
 {
 	wchar_t pathname[PATH_MAX];
 	for (size_t i = 0; i < ARRAY_SIZE(m_NormalMap); ++i)
 	{
 		swprintf_s(pathname, ARRAY_SIZE(pathname), L"art/textures/animated/water/%ls/normal00%02d.png", m_WaterType.c_str(), static_cast<int>(i) + 1);
 		CTextureProperties textureProps(pathname);
 		textureProps.SetAddressMode(
 			Renderer::Backend::Sampler::AddressMode::REPEAT);
 		textureProps.SetAnisotropicFilter(true);
 
 		CTexturePtr texture = g_Renderer.GetTextureManager().CreateTexture(textureProps);
 		texture->Prefetch();
 		m_NormalMap[i] = texture;
 	}
 }
 
 ///////////////////////////////////////////////////////////////////
 // Unload water textures
 void WaterManager::UnloadWaterTextures()
 {
 	for (size_t i = 0; i < ARRAY_SIZE(m_WaterTexture); i++)
 		m_WaterTexture[i].reset();
 
 	for (size_t i = 0; i < ARRAY_SIZE(m_NormalMap); i++)
 		m_NormalMap[i].reset();
 
 	m_RefractionFramebuffer.reset();
 	m_ReflectionFramebuffer.reset();
 	m_ReflectionTexture.reset();
 	m_RefractionTexture.reset();
 }
 
 template<bool Transpose>
 static inline void ComputeDirection(float* distanceMap, const u16* heightmap, float waterHeight, size_t SideSize, size_t maxLevel)
 {
 #define ABOVEWATER(x, z) (HEIGHT_SCALE * heightmap[z*SideSize + x] >= waterHeight)
 #define UPDATELOOKAHEAD \
 	for (; lookahead <= id2+maxLevel && lookahead < SideSize && \
 	       ((!Transpose && !ABOVEWATER(lookahead, id1)) || (Transpose && !ABOVEWATER(id1, lookahead))); ++lookahead)
 	// Algorithm:
 	// We want to know the distance to the closest shore point. Go through each line/column,
 	// keep track of when we encountered the last shore point and how far ahead the next one is.
 	for (size_t id1 = 0; id1 < SideSize; ++id1)
 	{
 		size_t id2 = 0;
 		const size_t& x = Transpose ? id1 : id2;
 		const size_t& z = Transpose ? id2 : id1;
 
 		size_t level = ABOVEWATER(x, z) ? 0 : maxLevel;
 		size_t lookahead = (size_t)(level > 0);
 
 		UPDATELOOKAHEAD;
 
 		// start moving
 		for (; id2 < SideSize; ++id2)
 		{
 			// update current level
 			if (ABOVEWATER(x, z))
 				level = 0;
 			else
 				level = std::min(level+1, maxLevel);
 
 			// move lookahead
 			if (lookahead == id2)
 				++lookahead;
 			UPDATELOOKAHEAD;
 
 			// This is the important bit: set the distance to either:
 			// - the distance to the previous shore point (level)
 			// - the distance to the next shore point (lookahead-id2)
 			distanceMap[z*SideSize + x] = std::min(distanceMap[z*SideSize + x], (float)std::min(lookahead-id2, level));
 		}
 	}
 #undef ABOVEWATER
 #undef UPDATELOOKAHEAD
 }
 
 ///////////////////////////////////////////////////////////////////
 // Calculate our binary heightmap from the terrain heightmap.
 void WaterManager::RecomputeDistanceHeightmap()
 {
 	CTerrain* terrain = g_Game->GetWorld()->GetTerrain();
 	if (!terrain || !terrain->GetHeightMap())
 		return;
 
 	size_t SideSize = m_MapSize;
 
 	// we want to look ahead some distance, but not too much (less efficient and not interesting). This is our lookahead.
 	const size_t maxLevel = 5;
 
 	if (!m_DistanceHeightmap)
 	{
 		m_DistanceHeightmap = std::make_unique<float[]>(SideSize * SideSize);
 		std::fill(m_DistanceHeightmap.get(), m_DistanceHeightmap.get() + SideSize * SideSize, static_cast<float>(maxLevel));
 	}
 
 	// Create a manhattan-distance heightmap.
 	// This could be refined to only be done near the coast itself, but it's probably not necessary.
 
 	u16* heightmap = terrain->GetHeightMap();
 
 	ComputeDirection<false>(m_DistanceHeightmap.get(), heightmap, m_WaterHeight, SideSize, maxLevel);
 	ComputeDirection<true>(m_DistanceHeightmap.get(), heightmap, m_WaterHeight, SideSize, maxLevel);
 }
 
 // This requires m_DistanceHeightmap to be defined properly.
 void WaterManager::CreateWaveMeshes()
 {
 	if (m_MapSize == 0)
 		return;
 
 	CTerrain* terrain = g_Game->GetWorld()->GetTerrain();
 	if (!terrain || !terrain->GetHeightMap())
 		return;
 
 	m_ShoreWaves.clear();
 	m_ShoreWavesVBIndices.Reset();
 
 	if (m_Waviness < 5.0f && m_WaterType != L"ocean")
 		return;
 
 	size_t SideSize = m_MapSize;
 
 	// First step: get the points near the coast.
 	std::set<int> CoastalPointsSet;
 	for (size_t z = 1; z < SideSize-1; ++z)
 		for (size_t x = 1; x < SideSize-1; ++x)
 			// get the points not on the shore but near it, ocean-side
 			if (m_DistanceHeightmap[z*m_MapSize + x] > 0.5f && m_DistanceHeightmap[z*m_MapSize + x] < 1.5f)
 				CoastalPointsSet.insert((z)*SideSize + x);
 
 	// Second step: create chains out of those coastal points.
 	static const int around[8][2] = { { -1,-1 }, { -1,0 }, { -1,1 }, { 0,1 }, { 1,1 }, { 1,0 }, { 1,-1 }, { 0,-1 } };
 
 	std::vector<std::deque<CoastalPoint> > CoastalPointsChains;
 	while (!CoastalPointsSet.empty())
 	{
 		int index = *(CoastalPointsSet.begin());
 		int x = index % SideSize;
 		int y = (index - x ) / SideSize;
 
 		std::deque<CoastalPoint> Chain;
 
 		Chain.push_front(CoastalPoint(index,CVector2D(x*4,y*4)));
 
 		// Erase us.
 		CoastalPointsSet.erase(CoastalPointsSet.begin());
 
 		// We're our starter points. At most we can have 2 points close to us.
 		// We'll pick the first one and look for its neighbors (he can only have one new)
 		// Up until we either reach the end of the chain, or ourselves.
 		// Then go down the other direction if there is any.
 		int neighbours[2] = { -1, -1 };
 		int nbNeighb = 0;
 		for (int i = 0; i < 8; ++i)
 		{
 			if (CoastalPointsSet.count(x + around[i][0] + (y + around[i][1])*SideSize))
 			{
 				if (nbNeighb < 2)
 					neighbours[nbNeighb] = x + around[i][0] + (y + around[i][1])*SideSize;
 				++nbNeighb;
 			}
 		}
 		if (nbNeighb > 2)
 			continue;
 
 		for (int i = 0; i < 2; ++i)
 		{
 			if (neighbours[i] == -1)
 				continue;
 			// Move to our neighboring point
 			int xx = neighbours[i] % SideSize;
 			int yy = (neighbours[i] - xx ) / SideSize;
 			int indexx = xx + yy*SideSize;
 			int endedChain = false;
 
 			if (i == 0)
 				Chain.push_back(CoastalPoint(indexx,CVector2D(xx*4,yy*4)));
 			else
 				Chain.push_front(CoastalPoint(indexx,CVector2D(xx*4,yy*4)));
 
 			// If there's a loop we'll be the "other" neighboring point already so check for that.
 			// We'll readd at the end/front the other one to have full squares.
 			if (CoastalPointsSet.count(indexx) == 0)
 				break;
 
 			CoastalPointsSet.erase(indexx);
 
 			// Start checking from there.
 			while(!endedChain)
 			{
 				bool found = false;
 				nbNeighb = 0;
 				for (int p = 0; p < 8; ++p)
 				{
 					if (CoastalPointsSet.count(xx+around[p][0] + (yy + around[p][1])*SideSize))
 					{
 						if (nbNeighb >= 2)
 						{
 							CoastalPointsSet.erase(xx + yy*SideSize);
 							continue;
 						}
 						++nbNeighb;
 						// We've found a new point around us.
 						// Move there
 						xx = xx + around[p][0];
 						yy = yy + around[p][1];
 						indexx = xx + yy*SideSize;
 						if (i == 0)
 							Chain.push_back(CoastalPoint(indexx,CVector2D(xx*4,yy*4)));
 						else
 							Chain.push_front(CoastalPoint(indexx,CVector2D(xx*4,yy*4)));
 						CoastalPointsSet.erase(xx + yy*SideSize);
 						found = true;
 						break;
 					}
 				}
 				if (!found)
 					endedChain = true;
 			}
 		}
 		if (Chain.size() > 10)
 			CoastalPointsChains.push_back(Chain);
 	}
 
 	// (optional) third step: Smooth chains out.
 	// This is also really dumb.
 	for (size_t i = 0; i < CoastalPointsChains.size(); ++i)
 	{
 		// Bump 1 for smoother.
 		for (int p = 0; p < 3; ++p)
 		{
 			for (size_t j = 1; j < CoastalPointsChains[i].size()-1; ++j)
 			{
 				CVector2D realPos = CoastalPointsChains[i][j-1].position + CoastalPointsChains[i][j+1].position;
 
 				CoastalPointsChains[i][j].position = (CoastalPointsChains[i][j].position + realPos/2.0f)/2.0f;
 			}
 		}
 	}
 
 	// Fourth step: create waves themselves, using those chains. We basically create subchains.
 	GLushort waveSizes = 14;	// maximal size in width.
 
 	// Construct indices buffer (we can afford one for all of them)
 	std::vector<GLushort> water_indices;
 	for (GLushort a = 0; a < waveSizes - 1; ++a)
 	{
 		for (GLushort rect = 0; rect < 7; ++rect)
 		{
 			water_indices.push_back(a * 9 + rect);
 			water_indices.push_back(a * 9 + 9 + rect);
 			water_indices.push_back(a * 9 + 1 + rect);
 			water_indices.push_back(a * 9 + 9 + rect);
 			water_indices.push_back(a * 9 + 10 + rect);
 			water_indices.push_back(a * 9 + 1 + rect);
 		}
 	}
 	// Generic indexes, max-length
 	m_ShoreWavesVBIndices = g_VBMan.AllocateChunk(
 		sizeof(GLushort), water_indices.size(),
 		Renderer::Backend::GL::CBuffer::Type::INDEX, false,
 		nullptr, CVertexBufferManager::Group::WATER);
 	m_ShoreWavesVBIndices->m_Owner->UpdateChunkVertices(m_ShoreWavesVBIndices.Get(), &water_indices[0]);
 
 	float diff = (rand() % 50) / 5.0f;
 
 	std::vector<SWavesVertex> vertices, reversed;
 	for (size_t i = 0; i < CoastalPointsChains.size(); ++i)
 	{
 		for (size_t j = 0; j < CoastalPointsChains[i].size()-waveSizes; ++j)
 		{
 			if (CoastalPointsChains[i].size()- 1 - j < waveSizes)
 				break;
 
 			GLushort width = waveSizes;
 
 			// First pass to get some parameters out.
 			float outmost = 0.0f;	// how far to move on the shore.
 			float avgDepth = 0.0f;
 			int sign = 1;
 			CVector2D firstPerp(0,0), perp(0,0), lastPerp(0,0);
 			for (GLushort a = 0; a < waveSizes;++a)
 			{
 				lastPerp = perp;
 				perp = CVector2D(0,0);
 				int nb = 0;
 				CVector2D pos = CoastalPointsChains[i][j+a].position;
 				CVector2D posPlus;
 				CVector2D posMinus;
 				if (a > 0)
 				{
 					++nb;
 					posMinus = CoastalPointsChains[i][j+a-1].position;
 					perp += pos-posMinus;
 				}
 				if (a < waveSizes-1)
 				{
 					++nb;
 					posPlus = CoastalPointsChains[i][j+a+1].position;
 					perp += posPlus-pos;
 				}
 				perp /= nb;
 				perp = CVector2D(-perp.Y,perp.X).Normalized();
 
 				if (a == 0)
 					firstPerp = perp;
 
 				if ( a > 1 && perp.Dot(lastPerp) < 0.90f && perp.Dot(firstPerp) < 0.70f)
 				{
 					width = a+1;
 					break;
 				}
 
 				if (terrain->GetExactGroundLevel(pos.X+perp.X*1.5f, pos.Y+perp.Y*1.5f) > m_WaterHeight)
 					sign = -1;
 
 				avgDepth += terrain->GetExactGroundLevel(pos.X+sign*perp.X*20.0f, pos.Y+sign*perp.Y*20.0f) - m_WaterHeight;
 
 				float localOutmost = -2.0f;
 				while (localOutmost < 0.0f)
 				{
 					float depth = terrain->GetExactGroundLevel(pos.X+sign*perp.X*localOutmost, pos.Y+sign*perp.Y*localOutmost) - m_WaterHeight;
 					if (depth < 0.0f || depth > 0.6f)
 						localOutmost += 0.2f;
 					else
 						break;
 				}
 
 				outmost += localOutmost;
 			}
 			if (width < 5)
 			{
 				j += 6;
 				continue;
 			}
 
 			outmost /= width;
 
 			if (outmost > -0.5f)
 			{
 				j += 3;
 				continue;
 			}
 			outmost = -2.5f + outmost * m_Waviness/10.0f;
 
 			avgDepth /= width;
 
 			if (avgDepth > -1.3f)
 			{
 				j += 3;
 				continue;
 			}
 			// we passed the checks, we can create a wave of size "width".
 
 			std::unique_ptr<WaveObject> shoreWave = std::make_unique<WaveObject>();
 			vertices.clear();
 			vertices.reserve(9 * width);
 
 			shoreWave->m_Width = width;
 			shoreWave->m_TimeDiff = diff;
 			diff += (rand() % 100) / 25.0f + 4.0f;
 
 			for (GLushort a = 0; a < width;++a)
 			{
 				perp = CVector2D(0,0);
 				int nb = 0;
 				CVector2D pos = CoastalPointsChains[i][j+a].position;
 				CVector2D posPlus;
 				CVector2D posMinus;
 				if (a > 0)
 				{
 					++nb;
 					posMinus = CoastalPointsChains[i][j+a-1].position;
 					perp += pos-posMinus;
 				}
 				if (a < waveSizes-1)
 				{
 					++nb;
 					posPlus = CoastalPointsChains[i][j+a+1].position;
 					perp += posPlus-pos;
 				}
 				perp /= nb;
 				perp = CVector2D(-perp.Y,perp.X).Normalized();
 
 				SWavesVertex point[9];
 
 				float baseHeight = 0.04f;
 
 				float halfWidth = (width-1.0f)/2.0f;
 				float sideNess = sqrtf(Clamp( (halfWidth - fabsf(a - halfWidth)) / 3.0f, 0.0f, 1.0f));
 
 				point[0].m_UV[0] = a; point[0].m_UV[1] = 8;
 				point[1].m_UV[0] = a; point[1].m_UV[1] = 7;
 				point[2].m_UV[0] = a; point[2].m_UV[1] = 6;
 				point[3].m_UV[0] = a; point[3].m_UV[1] = 5;
 				point[4].m_UV[0] = a; point[4].m_UV[1] = 4;
 				point[5].m_UV[0] = a; point[5].m_UV[1] = 3;
 				point[6].m_UV[0] = a; point[6].m_UV[1] = 2;
 				point[7].m_UV[0] = a; point[7].m_UV[1] = 1;
 				point[8].m_UV[0] = a; point[8].m_UV[1] = 0;
 
 				point[0].m_PerpVect = perp;
 				point[1].m_PerpVect = perp;
 				point[2].m_PerpVect = perp;
 				point[3].m_PerpVect = perp;
 				point[4].m_PerpVect = perp;
 				point[5].m_PerpVect = perp;
 				point[6].m_PerpVect = perp;
 				point[7].m_PerpVect = perp;
 				point[8].m_PerpVect = perp;
 
 				static const float perpT1[9] = { 6.0f, 6.05f, 6.1f, 6.2f, 6.3f, 6.4f, 6.5f, 6.6f, 9.7f };
 				static const float perpT2[9] = { 2.0f, 2.1f,  2.2f, 2.3f, 2.4f, 3.0f, 3.3f, 3.6f, 9.5f };
 				static const float perpT3[9] = { 1.1f, 0.7f, -0.2f, 0.0f, 0.6f, 1.3f, 2.2f, 3.6f, 9.0f };
 				static const float perpT4[9] = { 2.0f, 2.1f,  1.2f, 1.5f, 1.7f, 1.9f, 2.7f, 3.8f, 9.0f };
 
 				static const float heightT1[9] = { 0.0f, 0.2f, 0.5f, 0.8f, 0.9f, 0.85f, 0.6f, 0.2f, 0.0 };
 				static const float heightT2[9] = { -0.8f, -0.4f, 0.0f, 0.1f, 0.1f, 0.03f, 0.0f, 0.0f, 0.0 };
 				static const float heightT3[9] = { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0 };
 
 				for (size_t t = 0; t < 9; ++t)
 				{
 					float terrHeight = 0.05f + terrain->GetExactGroundLevel(pos.X+sign*perp.X*(perpT1[t]+outmost),
 																			pos.Y+sign*perp.Y*(perpT1[t]+outmost));
 					point[t].m_BasePosition = CVector3D(pos.X+sign*perp.X*(perpT1[t]+outmost), baseHeight + heightT1[t]*sideNess + std::max(m_WaterHeight,terrHeight),
 														pos.Y+sign*perp.Y*(perpT1[t]+outmost));
 				}
 				for (size_t t = 0; t < 9; ++t)
 				{
 					float terrHeight = 0.05f + terrain->GetExactGroundLevel(pos.X+sign*perp.X*(perpT2[t]+outmost),
 																			pos.Y+sign*perp.Y*(perpT2[t]+outmost));
 					point[t].m_ApexPosition = CVector3D(pos.X+sign*perp.X*(perpT2[t]+outmost), baseHeight + heightT1[t]*sideNess + std::max(m_WaterHeight,terrHeight),
 														pos.Y+sign*perp.Y*(perpT2[t]+outmost));
 				}
 				for (size_t t = 0; t < 9; ++t)
 				{
 					float terrHeight = 0.05f + terrain->GetExactGroundLevel(pos.X+sign*perp.X*(perpT3[t]+outmost*sideNess),
 																			pos.Y+sign*perp.Y*(perpT3[t]+outmost*sideNess));
 					point[t].m_SplashPosition = CVector3D(pos.X+sign*perp.X*(perpT3[t]+outmost*sideNess), baseHeight + heightT2[t]*sideNess + std::max(m_WaterHeight,terrHeight), pos.Y+sign*perp.Y*(perpT3[t]+outmost*sideNess));
 				}
 				for (size_t t = 0; t < 9; ++t)
 				{
 					float terrHeight = 0.05f + terrain->GetExactGroundLevel(pos.X+sign*perp.X*(perpT4[t]+outmost),
 																			pos.Y+sign*perp.Y*(perpT4[t]+outmost));
 					point[t].m_RetreatPosition = CVector3D(pos.X+sign*perp.X*(perpT4[t]+outmost), baseHeight + heightT3[t]*sideNess + std::max(m_WaterHeight,terrHeight),
 														   pos.Y+sign*perp.Y*(perpT4[t]+outmost));
 				}
 
 				vertices.push_back(point[8]);
 				vertices.push_back(point[7]);
 				vertices.push_back(point[6]);
 				vertices.push_back(point[5]);
 				vertices.push_back(point[4]);
 				vertices.push_back(point[3]);
 				vertices.push_back(point[2]);
 				vertices.push_back(point[1]);
 				vertices.push_back(point[0]);
 
 				shoreWave->m_AABB += point[8].m_SplashPosition;
 				shoreWave->m_AABB += point[8].m_BasePosition;
 				shoreWave->m_AABB += point[0].m_SplashPosition;
 				shoreWave->m_AABB += point[0].m_BasePosition;
 				shoreWave->m_AABB += point[4].m_ApexPosition;
 			}
 
 			if (sign == 1)
 			{
 				// Let's do some fancy reversing.
 				reversed.clear();
 				reversed.reserve(vertices.size());
 				for (int a = width - 1; a >= 0; --a)
 				{
 					for (size_t t = 0; t < 9; ++t)
 						reversed.push_back(vertices[a * 9 + t]);
 				}
 				std::swap(vertices, reversed);
 			}
 			j += width/2-1;
 
 			shoreWave->m_VBVertices = g_VBMan.AllocateChunk(
 				sizeof(SWavesVertex), vertices.size(),
 				Renderer::Backend::GL::CBuffer::Type::VERTEX, false,
 				nullptr, CVertexBufferManager::Group::WATER);
 			shoreWave->m_VBVertices->m_Owner->UpdateChunkVertices(shoreWave->m_VBVertices.Get(), &vertices[0]);
 
 			m_ShoreWaves.emplace_back(std::move(shoreWave));
 		}
 	}
 }
 
 void WaterManager::RenderWaves(
 	Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 	const CFrustum& frustrum)
 {
 	GPU_SCOPED_LABEL(deviceCommandContext, "Render Waves");
 #if CONFIG2_GLES
 	UNUSED2(frustrum);
 	#warning Fix WaterManager::RenderWaves on GLES
 #else
 	if (!m_WaterFancyEffects)
 		return;
 
 	deviceCommandContext->SetFramebuffer(m_FancyEffectsFramebuffer.get());
 	deviceCommandContext->ClearFramebuffer();
 
 	CShaderTechniquePtr tech = g_Renderer.GetShaderManager().LoadEffect(str_water_waves);
 	tech->BeginPass();
 	deviceCommandContext->SetGraphicsPipelineState(
 		tech->GetGraphicsPipelineStateDesc());
 	const CShaderProgramPtr& shader = tech->GetShader();
 
 	m_WaveTex->UploadBackendTextureIfNeeded(deviceCommandContext);
 	m_FoamTex->UploadBackendTextureIfNeeded(deviceCommandContext);
 	shader->BindTexture(str_waveTex, m_WaveTex->GetBackendTexture());
 	shader->BindTexture(str_foamTex, m_FoamTex->GetBackendTexture());
 
 	shader->Uniform(str_time, (float)m_WaterTexTimer);
 	shader->Uniform(str_transform, g_Renderer.GetSceneRenderer().GetViewCamera().GetViewProjection());
 
 	for (size_t a = 0; a < m_ShoreWaves.size(); ++a)
 	{
 		if (!frustrum.IsBoxVisible(m_ShoreWaves[a]->m_AABB))
 			continue;
 
 		CVertexBuffer::VBChunk* VBchunk = m_ShoreWaves[a]->m_VBVertices.Get();
 		SWavesVertex* base = (SWavesVertex*)VBchunk->m_Owner->Bind(deviceCommandContext);
 
 		// setup data pointers
 		GLsizei stride = sizeof(SWavesVertex);
 		shader->VertexPointer(3, GL_FLOAT, stride, &base[VBchunk->m_Index].m_BasePosition);
 		shader->TexCoordPointer(GL_TEXTURE0, 2, GL_UNSIGNED_BYTE, stride, &base[VBchunk->m_Index].m_UV);
 		//	NormalPointer(gl_FLOAT, stride, &base[m_VBWater->m_Index].m_UV)
 		glVertexAttribPointerARB(2, 2, GL_FLOAT, GL_FALSE, stride, &base[VBchunk->m_Index].m_PerpVect);	// replaces commented above because my normal is vec2
 		shader->VertexAttribPointer(str_a_apexPosition, 3, GL_FLOAT, false, stride, &base[VBchunk->m_Index].m_ApexPosition);
 		shader->VertexAttribPointer(str_a_splashPosition, 3, GL_FLOAT, false, stride, &base[VBchunk->m_Index].m_SplashPosition);
 		shader->VertexAttribPointer(str_a_retreatPosition, 3, GL_FLOAT, false, stride, &base[VBchunk->m_Index].m_RetreatPosition);
 
 		shader->AssertPointersBound();
 
 		shader->Uniform(str_translation, m_ShoreWaves[a]->m_TimeDiff);
 		shader->Uniform(str_width, (int)m_ShoreWaves[a]->m_Width);
 
-		u8* indexBase = m_ShoreWavesVBIndices->m_Owner->Bind(deviceCommandContext);
-		glDrawElements(GL_TRIANGLES, (GLsizei) (m_ShoreWaves[a]->m_Width-1)*(7*6),
-					   GL_UNSIGNED_SHORT, indexBase + sizeof(u16)*(m_ShoreWavesVBIndices->m_Index));
+		m_ShoreWavesVBIndices->m_Owner->UploadIfNeeded(deviceCommandContext);
+		deviceCommandContext->SetIndexBuffer(m_ShoreWavesVBIndices->m_Owner->GetBuffer());
+		deviceCommandContext->DrawIndexed(m_ShoreWavesVBIndices->m_Index, (m_ShoreWaves[a]->m_Width - 1) * (7 * 6), 0);
 
 		shader->Uniform(str_translation, m_ShoreWaves[a]->m_TimeDiff + 6.0f);
 
 		// TODO: figure out why this doesn't work.
 		//g_Renderer.m_Stats.m_DrawCalls++;
 		//g_Renderer.m_Stats.m_WaterTris += m_ShoreWaves_VBIndices->m_Count / 3;
 
 		CVertexBuffer::Unbind(deviceCommandContext);
 	}
 	tech->EndPass();
 	deviceCommandContext->SetFramebuffer(
 		deviceCommandContext->GetDevice()->GetCurrentBackbuffer());
 #endif
 }
 
 void WaterManager::RecomputeWaterData()
 {
 	if (!m_MapSize)
 		return;
 
 	RecomputeDistanceHeightmap();
 	RecomputeWindStrength();
 	CreateWaveMeshes();
 }
 
 ///////////////////////////////////////////////////////////////////
 // Calculate the strength of the wind at a given point on the map.
 void WaterManager::RecomputeWindStrength()
 {
 	if (m_MapSize <= 0)
 		return;
 
 	if (!m_WindStrength)
 		m_WindStrength = std::make_unique<float[]>(m_MapSize * m_MapSize);
 
 	CTerrain* terrain = g_Game->GetWorld()->GetTerrain();
 	if (!terrain || !terrain->GetHeightMap())
 		return;
 
 	CVector2D windDir = CVector2D(cos(m_WindAngle), sin(m_WindAngle));
 
 	int stepSize = 10;
 	ssize_t windX = -round(stepSize * windDir.X);
 	ssize_t windY = -round(stepSize * windDir.Y);
 
 	struct SWindPoint {
 		SWindPoint(size_t x, size_t y, float strength) : X(x), Y(y), windStrength(strength) {}
 		ssize_t X;
 		ssize_t Y;
 		float windStrength;
 	};
 
 	std::vector<SWindPoint> startingPoints;
 	std::vector<std::pair<int, int>> movement; // Every increment, move each starting point by all of these.
 
 	// Compute starting points (one or two edges of the map) and how much to move each computation increment.
 	if (fabs(windDir.X) < 0.01f)
 	{
 		movement.emplace_back(0, windY > 0.f ? 1 : -1);
 		startingPoints.reserve(m_MapSize);
 		size_t start = windY > 0 ? 0 : m_MapSize - 1;
 		for (size_t x = 0; x < m_MapSize; ++x)
 			startingPoints.emplace_back(x, start, 0.f);
 	}
 	else if (fabs(windDir.Y) < 0.01f)
 	{
 		movement.emplace_back(windX > 0.f ? 1 : - 1, 0);
 		startingPoints.reserve(m_MapSize);
 		size_t start = windX > 0 ? 0 : m_MapSize - 1;
 		for (size_t z = 0; z < m_MapSize; ++z)
 			startingPoints.emplace_back(start, z, 0.f);
 	}
 	else
 	{
 		startingPoints.reserve(m_MapSize * 2);
 		// Points along X.
 		size_t start = windY > 0 ? 0 : m_MapSize - 1;
 		for (size_t x = 0; x < m_MapSize; ++x)
 			startingPoints.emplace_back(x, start, 0.f);
 		// Points along Z, avoid repeating the corner point.
 		start = windX > 0 ? 0 : m_MapSize - 1;
 		if (windY > 0)
 			for (size_t z = 1; z < m_MapSize; ++z)
 				startingPoints.emplace_back(start, z, 0.f);
 		else
 			for (size_t z = 0; z < m_MapSize-1; ++z)
 				startingPoints.emplace_back(start, z, 0.f);
 
 		// Compute movement array.
 		movement.reserve(std::max(std::abs(windX),std::abs(windY)));
 		while (windX != 0 || windY != 0)
 		{
 			std::pair<ssize_t, ssize_t> move = {
 				windX == 0 ? 0 : windX > 0 ? +1 : -1,
 				windY == 0 ? 0 : windY > 0 ? +1 : -1
 			};
 			windX -= move.first;
 			windY -= move.second;
 			movement.push_back(move);
 		}
 	}
 
 	// We have all starting points ready, move them all until the map is covered.
 	for (SWindPoint& point : startingPoints)
 	{
 		// Starting velocity is 1.0 unless in shallow water.
 		m_WindStrength[point.Y * m_MapSize + point.X] = 1.f;
 		float depth = m_WaterHeight - terrain->GetVertexGroundLevel(point.X, point.Y);
 		if (depth > 0.f && depth < 2.f)
 			m_WindStrength[point.Y * m_MapSize + point.X] = depth / 2.f;
 		point.windStrength = m_WindStrength[point.Y * m_MapSize + point.X];
 
 		bool onMap = true;
 		while (onMap)
 			for (size_t step = 0; step < movement.size(); ++step)
 			{
 				// Move wind speed towards the mean.
 				point.windStrength = 0.15f + point.windStrength * 0.85f;
 
 				// Adjust speed based on height difference, a positive height difference slowly increases speed (simulate venturi effect)
 				// and a lower height reduces speed (wind protection from hills/...)
 				float heightDiff = std::max(m_WaterHeight, terrain->GetVertexGroundLevel(point.X + movement[step].first, point.Y + movement[step].second)) -
 					std::max(m_WaterHeight, terrain->GetVertexGroundLevel(point.X, point.Y));
 				if (heightDiff > 0.f)
 					point.windStrength = std::min(2.f, point.windStrength + std::min(4.f, heightDiff) / 40.f);
 				else
 					point.windStrength = std::max(0.f, point.windStrength + std::max(-4.f, heightDiff) / 5.f);
 
 				point.X += movement[step].first;
 				point.Y += movement[step].second;
 
 				if (point.X < 0 || point.X >= static_cast<ssize_t>(m_MapSize) || point.Y < 0 || point.Y >= static_cast<ssize_t>(m_MapSize))
 				{
 					onMap = false;
 					break;
 				}
 				m_WindStrength[point.Y * m_MapSize + point.X] = point.windStrength;
 			}
 	}
 	// TODO: should perhaps blur a little, or change the above code to incorporate neighboring tiles a bit.
 }
 
 ////////////////////////////////////////////////////////////////////////
 // TODO: This will always recalculate for now
 void WaterManager::SetMapSize(size_t size)
 {
 	// TODO: Im' blindly trusting the user here.
 	m_MapSize = size;
 	m_NeedInfoUpdate = true;
 	m_updatei0 = 0;
 	m_updatei1 = size;
 	m_updatej0 = 0;
 	m_updatej1 = size;
 
 	m_DistanceHeightmap.reset();
 	m_WindStrength.reset();
 }
 
 ////////////////////////////////////////////////////////////////////////
 // This will set the bools properly
 void WaterManager::UpdateQuality()
 {
 	if (g_RenderingOptions.GetWaterEffects() != m_WaterEffects)
 	{
 		m_WaterEffects = g_RenderingOptions.GetWaterEffects();
 		m_NeedsReloading = true;
 	}
 	if (g_RenderingOptions.GetWaterFancyEffects() != m_WaterFancyEffects)
 	{
 		m_WaterFancyEffects = g_RenderingOptions.GetWaterFancyEffects();
 		m_NeedsReloading = true;
 	}
 	if (g_RenderingOptions.GetWaterRealDepth() != m_WaterRealDepth)
 	{
 		m_WaterRealDepth = g_RenderingOptions.GetWaterRealDepth();
 		m_NeedsReloading = true;
 	}
 	if (g_RenderingOptions.GetWaterRefraction() != m_WaterRefraction)
 	{
 		m_WaterRefraction = g_RenderingOptions.GetWaterRefraction();
 		m_NeedsReloading = true;
 	}
 	if (g_RenderingOptions.GetWaterReflection() != m_WaterReflection)
 	{
 		m_WaterReflection = g_RenderingOptions.GetWaterReflection();
 		m_NeedsReloading = true;
 	}
 }
 
 bool WaterManager::WillRenderFancyWater() const
 {
 	return
 		m_RenderWater && g_VideoMode.GetBackend() != CVideoMode::Backend::GL_ARB &&
 		g_RenderingOptions.GetWaterEffects();
 }
 
 size_t WaterManager::GetCurrentTextureIndex(const double& period) const
 {
 	ENSURE(period > 0.0);
 	return static_cast<size_t>(m_WaterTexTimer * ARRAY_SIZE(m_WaterTexture) / period) % ARRAY_SIZE(m_WaterTexture);
 }
 
 size_t WaterManager::GetNextTextureIndex(const double& period) const
 {
 	ENSURE(period > 0.0);
 	return (GetCurrentTextureIndex(period) + 1) % ARRAY_SIZE(m_WaterTexture);
 }
Index: ps/trunk/source/renderer/backend/gl/DeviceCommandContext.h
===================================================================
--- ps/trunk/source/renderer/backend/gl/DeviceCommandContext.h	(revision 26524)
+++ ps/trunk/source/renderer/backend/gl/DeviceCommandContext.h	(revision 26525)
@@ -1,164 +1,178 @@
 /* Copyright (C) 2022 Wildfire Games.
  * This file is part of 0 A.D.
  *
  * 0 A.D. is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * 0 A.D. is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef INCLUDED_RENDERER_GL_DEVICECOMMANDCONTEXT
 #define INCLUDED_RENDERER_GL_DEVICECOMMANDCONTEXT
 
 #include "lib/ogl.h"
 #include "renderer/backend/Format.h"
 #include "renderer/backend/gl/Buffer.h"
 #include "renderer/backend/PipelineState.h"
 
 #include <array>
 #include <cstdint>
 #include <functional>
 #include <memory>
 #include <optional>
 #include <utility>
 
 namespace Renderer
 {
 
 namespace Backend
 {
 
 namespace GL
 {
 
 class CDevice;
 class CFramebuffer;
 class CTexture;
 
 class CDeviceCommandContext
 {
 public:
 	~CDeviceCommandContext();
 
 	CDevice* GetDevice() { return m_Device; }
 
 	void SetGraphicsPipelineState(const GraphicsPipelineStateDesc& pipelineStateDesc);
 
 	void BlitFramebuffer(CFramebuffer* destinationFramebuffer, CFramebuffer* sourceFramebuffer);
 
 	void ClearFramebuffer();
 	void ClearFramebuffer(const bool color, const bool depth, const bool stencil);
 	void SetFramebuffer(CFramebuffer* framebuffer);
 
 	void UploadTexture(CTexture* texture, const Format dataFormat,
 		const void* data, const size_t dataSize,
 		const uint32_t level = 0, const uint32_t layer = 0);
 	void UploadTextureRegion(CTexture* texture, const Format dataFormat,
 		const void* data, const size_t dataSize,
 		const uint32_t xOffset, const uint32_t yOffset,
 		const uint32_t width, const uint32_t height,
 		const uint32_t level = 0, const uint32_t layer = 0);
 
 	using UploadBufferFunction = std::function<void(u8*)>;
 	void UploadBuffer(CBuffer* buffer, const void* data, const uint32_t dataSize);
 	void UploadBuffer(CBuffer* buffer, const UploadBufferFunction& uploadFunction);
 	void UploadBufferRegion(
 		CBuffer* buffer, const void* data, const uint32_t dataOffset, const uint32_t dataSize);
 	void UploadBufferRegion(
 		CBuffer* buffer, const uint32_t dataOffset, const uint32_t dataSize,
 		const UploadBufferFunction& uploadFunction);
 
 	// TODO: maybe we should add a more common type, like CRectI.
 	struct Rect
 	{
 		int32_t x, y;
 		int32_t width, height;
 	};
 	void SetScissors(const uint32_t scissorCount, const Rect* scissors);
 	void SetViewports(const uint32_t viewportCount, const Rect* viewports);
 
+	void SetIndexBuffer(CBuffer* buffer);
+	void SetIndexBufferData(const void* data);
+
+	void Draw(const uint32_t firstVertex, const uint32_t vertexCount);
+	void DrawIndexed(
+		const uint32_t firstIndex, const uint32_t indexCount, const int32_t vertexOffset);
+	// TODO: should be removed when performance impact is minimal on slow hardware.
+	void DrawIndexedInRange(
+		const uint32_t firstIndex, const uint32_t indexCount,
+		const uint32_t start, const uint32_t end);
+
 	void BeginScopedLabel(const char* name);
 	void EndScopedLabel();
 
 	// TODO: remove direct binding after moving shaders.
 	void BindTexture(const uint32_t unit, const GLenum target, const GLuint handle);
 	void BindBuffer(const CBuffer::Type type, CBuffer* buffer);
 
 	void Flush();
 
 private:
 	friend class CDevice;
 
 	static std::unique_ptr<CDeviceCommandContext> Create(CDevice* device);
 
 	CDeviceCommandContext(CDevice* device);
 
 	void ResetStates();
 
 	void SetGraphicsPipelineStateImpl(
 		const GraphicsPipelineStateDesc& pipelineStateDesc, const bool force);
 
 	CDevice* m_Device = nullptr;
 
 	GraphicsPipelineStateDesc m_GraphicsPipelineStateDesc{};
 	CFramebuffer* m_Framebuffer = nullptr;
 	uint32_t m_ScissorCount = 0;
 	// GL2.1 doesn't support more than 1 scissor.
 	std::array<Rect, 1> m_Scissors;
 
 	uint32_t m_ScopedLabelDepth = 0;
 
+	CBuffer* m_IndexBuffer = nullptr;
+	const void* m_IndexBufferData = nullptr;
+
 	uint32_t m_ActiveTextureUnit = 0;
 	using BindUnit = std::pair<GLenum, GLuint>;
 	std::array<BindUnit, 16> m_BoundTextures;
 	class ScopedBind
 	{
 	public:
 		ScopedBind(CDeviceCommandContext* deviceCommandContext,
 			const GLenum target, const GLuint handle);
 
 		~ScopedBind();
 	private:
 		CDeviceCommandContext* m_DeviceCommandContext = nullptr;
 		BindUnit m_OldBindUnit;
 	};
 };
 
 } // namespace GL
 
 } // namespace Backend
 
 } // namespace Renderer
 
 #define GPU_SCOPED_LABEL(deviceCommandContext, name) \
 	GPUScopedLabel scopedLabel((deviceCommandContext), (name));
 
 class GPUScopedLabel
 {
 public:
 	GPUScopedLabel(
 		Renderer::Backend::GL::CDeviceCommandContext* deviceCommandContext,
 		const char* name)
 		: m_DeviceCommandContext(deviceCommandContext)
 	{
 		m_DeviceCommandContext->BeginScopedLabel(name);
 	}
 
 	~GPUScopedLabel()
 	{
 		m_DeviceCommandContext->EndScopedLabel();
 	}
 
 private:
 	Renderer::Backend::GL::CDeviceCommandContext* m_DeviceCommandContext = nullptr;
 };
 
 #endif // INCLUDED_RENDERER_GL_DEVICECOMMANDCONTEXT