Index: ps/trunk/source/renderer/backend/IDevice.h
===================================================================
--- ps/trunk/source/renderer/backend/IDevice.h (revision 28009)
+++ ps/trunk/source/renderer/backend/IDevice.h (revision 28010)
@@ -1,180 +1,187 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#ifndef INCLUDED_RENDERER_BACKEND_IDEVICE
#define INCLUDED_RENDERER_BACKEND_IDEVICE
#include "graphics/Color.h"
#include "ps/containers/Span.h"
#include "renderer/backend/Backend.h"
#include "renderer/backend/Format.h"
#include "renderer/backend/IBuffer.h"
#include "renderer/backend/IDevice.h"
#include "renderer/backend/IDeviceCommandContext.h"
#include "renderer/backend/IFramebuffer.h"
#include "renderer/backend/IShaderProgram.h"
#include "renderer/backend/ITexture.h"
#include "renderer/backend/PipelineState.h"
#include "scriptinterface/ScriptForward.h"
#include
#include
#include
class CShaderDefines;
class CStr;
namespace Renderer
{
namespace Backend
{
class IDevice
{
public:
struct Capabilities
{
bool S3TC;
bool ARBShaders;
bool ARBShadersShadow;
bool computeShaders;
bool debugLabels;
bool debugScopedLabels;
bool multisampling;
bool anisotropicFiltering;
uint32_t maxSampleCount;
float maxAnisotropy;
uint32_t maxTextureSize;
bool instancing;
};
virtual ~IDevice() {}
virtual Backend GetBackend() const = 0;
virtual const std::string& GetName() const = 0;
virtual const std::string& GetVersion() const = 0;
virtual const std::string& GetDriverInformation() const = 0;
virtual const std::vector& GetExtensions() const = 0;
virtual void Report(const ScriptRequest& rq, JS::HandleValue settings) = 0;
virtual std::unique_ptr CreateCommandContext() = 0;
/**
* Creates a graphics pipeline state. It's a caller responsibility to
* guarantee a lifespan of IShaderProgram stored in the description.
*/
virtual std::unique_ptr CreateGraphicsPipelineState(
const SGraphicsPipelineStateDesc& pipelineStateDesc) = 0;
/**
+ * Creates a compute pipeline state. It's a caller responsibility to
+ * guarantee a lifespan of IShaderProgram stored in the description.
+ */
+ virtual std::unique_ptr CreateComputePipelineState(
+ const SComputePipelineStateDesc& pipelineStateDesc) = 0;
+
+ /**
* Creates a vertex input layout. It's recommended to use as few different
* layouts as posible.
*/
virtual std::unique_ptr CreateVertexInputLayout(
const PS::span attributes) = 0;
virtual std::unique_ptr CreateTexture(
const char* name, const ITexture::Type type, const uint32_t usage,
const Format format, const uint32_t width, const uint32_t height,
const Sampler::Desc& defaultSamplerDesc, const uint32_t MIPLevelCount, const uint32_t sampleCount) = 0;
virtual std::unique_ptr CreateTexture2D(
const char* name, const uint32_t usage,
const Format format, const uint32_t width, const uint32_t height,
const Sampler::Desc& defaultSamplerDesc, const uint32_t MIPLevelCount = 1, const uint32_t sampleCount = 1) = 0;
/**
* @see IFramebuffer
*
* The color attachment and the depth-stencil attachment should not be
* nullptr at the same time. There should not be many different clear
* colors along all color attachments for all framebuffers created for
* the device.
*
* @return A valid framebuffer if it was created successfully else nullptr.
*/
virtual std::unique_ptr CreateFramebuffer(
const char* name, SColorAttachment* colorAttachment,
SDepthStencilAttachment* depthStencilAttachment) = 0;
virtual std::unique_ptr CreateBuffer(
const char* name, const IBuffer::Type type, const uint32_t size, const bool dynamic) = 0;
virtual std::unique_ptr CreateShaderProgram(
const CStr& name, const CShaderDefines& defines) = 0;
/**
* Acquires a backbuffer for rendering a frame.
*
* @return True if it was successfully acquired and we can render to it.
*/
virtual bool AcquireNextBackbuffer() = 0;
/**
* Returns a framebuffer for the current backbuffer with the required
* attachment operations. It should not be called if the last
* AcquireNextBackbuffer call returned false.
*
* It's guaranteed that for the same acquired backbuffer this function returns
* a framebuffer with the same attachments and properties except load and
* store operations.
*
* @return The last successfully acquired framebuffer that wasn't
* presented.
*/
virtual IFramebuffer* GetCurrentBackbuffer(
const AttachmentLoadOp colorAttachmentLoadOp,
const AttachmentStoreOp colorAttachmentStoreOp,
const AttachmentLoadOp depthStencilAttachmentLoadOp,
const AttachmentStoreOp depthStencilAttachmentStoreOp) = 0;
/**
* Presents the backbuffer to the swapchain queue to be flipped on a
* screen. Should be called only if the last AcquireNextBackbuffer call
* returned true.
*/
virtual void Present() = 0;
/**
* Should be called on window surface resize. It's the device owner
* responsibility to call that function. Shouldn't be called during
* rendering to an acquired backbuffer.
*/
virtual void OnWindowResize(const uint32_t width, const uint32_t height) = 0;
virtual bool IsTextureFormatSupported(const Format format) const = 0;
virtual bool IsFramebufferFormatSupported(const Format format) const = 0;
/**
* Returns the most suitable format for the usage. Returns
* Format::UNDEFINED if there is no such format.
*/
virtual Format GetPreferredDepthStencilFormat(
const uint32_t usage, const bool depth, const bool stencil) const = 0;
virtual const Capabilities& GetCapabilities() const = 0;
};
} // namespace Backend
} // namespace Renderer
#endif // INCLUDED_RENDERER_BACKEND_IDEVICE
Index: ps/trunk/source/graphics/ShaderManager.cpp
===================================================================
--- ps/trunk/source/graphics/ShaderManager.cpp (revision 28009)
+++ ps/trunk/source/graphics/ShaderManager.cpp (revision 28010)
@@ -1,499 +1,522 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#include "precompiled.h"
#include "ShaderManager.h"
#include "graphics/PreprocessorWrapper.h"
#include "graphics/ShaderTechnique.h"
#include "lib/config2.h"
#include "lib/hash.h"
#include "lib/timer.h"
#include "lib/utf8.h"
#include "ps/CLogger.h"
#include "ps/CStrIntern.h"
#include "ps/CStrInternStatic.h"
#include "ps/Filesystem.h"
#include "ps/Profile.h"
#include "ps/XML/Xeromyces.h"
#include "renderer/backend/IDevice.h"
#define USE_SHADER_XML_VALIDATION 1
#if USE_SHADER_XML_VALIDATION
#include "ps/XML/RelaxNG.h"
#include "ps/XML/XMLWriter.h"
#endif
#include
#include
TIMER_ADD_CLIENT(tc_ShaderValidation);
CShaderManager::CShaderManager(Renderer::Backend::IDevice* device)
: m_Device(device)
{
#if USE_SHADER_XML_VALIDATION
{
TIMER_ACCRUE(tc_ShaderValidation);
if (!CXeromyces::AddValidator(g_VFS, "shader", "shaders/program.rng"))
LOGERROR("CShaderManager: failed to load grammar shaders/program.rng");
}
#endif
// Allow hotloading of textures
RegisterFileReloadFunc(ReloadChangedFileCB, this);
}
CShaderManager::~CShaderManager()
{
UnregisterFileReloadFunc(ReloadChangedFileCB, this);
}
CShaderProgramPtr CShaderManager::LoadProgram(const CStr& name, const CShaderDefines& defines)
{
CacheKey key = { name, defines };
std::map::iterator it = m_ProgramCache.find(key);
if (it != m_ProgramCache.end())
return it->second;
CShaderProgramPtr program = CShaderProgram::Create(m_Device, name, defines);
if (program)
{
for (const VfsPath& path : program->GetFileDependencies())
AddProgramFileDependency(program, path);
}
else
{
LOGERROR("Failed to load shader '%s'", name);
}
m_ProgramCache[key] = program;
return program;
}
size_t CShaderManager::EffectCacheKeyHash::operator()(const EffectCacheKey& key) const
{
size_t hash = 0;
hash_combine(hash, key.name.GetHash());
hash_combine(hash, key.defines.GetHash());
return hash;
}
bool CShaderManager::EffectCacheKey::operator==(const EffectCacheKey& b) const
{
return name == b.name && defines == b.defines;
}
CShaderTechniquePtr CShaderManager::LoadEffect(CStrIntern name)
{
return LoadEffect(name, CShaderDefines());
}
CShaderTechniquePtr CShaderManager::LoadEffect(CStrIntern name, const CShaderDefines& defines)
{
// Return the cached effect, if there is one
EffectCacheKey key = { name, defines };
EffectCacheMap::iterator it = m_EffectCache.find(key);
if (it != m_EffectCache.end())
return it->second;
// First time we've seen this key, so construct a new effect:
const VfsPath xmlFilename = L"shaders/effects/" + wstring_from_utf8(name.string()) + L".xml";
CShaderTechniquePtr tech = std::make_shared(
xmlFilename, defines, PipelineStateDescCallback{});
if (!LoadTechnique(tech))
{
LOGERROR("Failed to load effect '%s'", name.c_str());
tech = CShaderTechniquePtr();
}
m_EffectCache[key] = tech;
return tech;
}
CShaderTechniquePtr CShaderManager::LoadEffect(
CStrIntern name, const CShaderDefines& defines, const PipelineStateDescCallback& callback)
{
// We don't cache techniques with callbacks.
const VfsPath xmlFilename = L"shaders/effects/" + wstring_from_utf8(name.string()) + L".xml";
CShaderTechniquePtr technique = std::make_shared(xmlFilename, defines, callback);
if (!LoadTechnique(technique))
{
LOGERROR("Failed to load effect '%s'", name.c_str());
return {};
}
return technique;
}
bool CShaderManager::LoadTechnique(CShaderTechniquePtr& tech)
{
PROFILE2("loading technique");
PROFILE2_ATTR("name: %s", tech->GetPath().string8().c_str());
AddTechniqueFileDependency(tech, tech->GetPath());
CXeromyces XeroFile;
PSRETURN ret = XeroFile.Load(g_VFS, tech->GetPath());
if (ret != PSRETURN_OK)
return false;
// By default we assume that we have techinques for every dummy shader.
if (m_Device->GetBackend() == Renderer::Backend::Backend::DUMMY)
{
CShaderProgramPtr shaderProgram = LoadProgram(str_dummy.string(), tech->GetShaderDefines());
std::vector techPasses;
Renderer::Backend::SGraphicsPipelineStateDesc passPipelineStateDesc =
Renderer::Backend::MakeDefaultGraphicsPipelineStateDesc();
passPipelineStateDesc.shaderProgram = shaderProgram->GetBackendShaderProgram();
techPasses.emplace_back(
m_Device->CreateGraphicsPipelineState(passPipelineStateDesc), shaderProgram);
tech->SetPasses(std::move(techPasses));
return true;
}
// Define all the elements and attributes used in the XML file
#define EL(x) int el_##x = XeroFile.GetElementID(#x)
#define AT(x) int at_##x = XeroFile.GetAttributeID(#x)
EL(blend);
EL(color);
+ EL(compute);
EL(cull);
EL(define);
EL(depth);
EL(pass);
EL(polygon);
EL(require);
EL(sort_by_distance);
EL(stencil);
AT(compare);
AT(constant);
AT(context);
AT(depth_fail);
AT(dst);
AT(fail);
AT(front_face);
AT(func);
AT(mask);
AT(mask_read);
AT(mask_red);
AT(mask_green);
AT(mask_blue);
AT(mask_alpha);
AT(mode);
AT(name);
AT(op);
AT(pass);
AT(reference);
AT(shader);
AT(shaders);
AT(src);
AT(test);
AT(value);
#undef AT
#undef EL
// Prepare the preprocessor for conditional tests
CPreprocessorWrapper preprocessor;
preprocessor.AddDefines(tech->GetShaderDefines());
XMBElement root = XeroFile.GetRoot();
// Find all the techniques that we can use, and their preference
std::optional usableTech;
XERO_ITER_EL(root, technique)
{
bool isUsable = true;
XERO_ITER_EL(technique, child)
{
XMBAttributeList attrs = child.GetAttributes();
// TODO: require should be an attribute of the tech and not its child.
if (child.GetNodeName() == el_require)
{
if (attrs.GetNamedItem(at_shaders) == "arb")
{
if (m_Device->GetBackend() != Renderer::Backend::Backend::GL_ARB ||
!m_Device->GetCapabilities().ARBShaders)
{
isUsable = false;
}
}
else if (attrs.GetNamedItem(at_shaders) == "glsl")
{
if (m_Device->GetBackend() != Renderer::Backend::Backend::GL)
isUsable = false;
}
else if (attrs.GetNamedItem(at_shaders) == "spirv")
{
if (m_Device->GetBackend() != Renderer::Backend::Backend::VULKAN)
isUsable = false;
}
else if (!attrs.GetNamedItem(at_context).empty())
{
CStr cond = attrs.GetNamedItem(at_context);
if (!preprocessor.TestConditional(cond))
isUsable = false;
}
}
}
if (isUsable)
{
usableTech.emplace(technique);
break;
}
}
if (!usableTech.has_value())
{
debug_warn(L"Can't find a usable technique");
return false;
}
tech->SetSortByDistance(false);
+ const auto loadShaderProgramForTech = [&](const CStr& name, const CShaderDefines& defines)
+ {
+ CShaderProgramPtr shaderProgram = LoadProgram(name.c_str(), defines);
+ if (shaderProgram)
+ {
+ for (const VfsPath& shaderProgramPath : shaderProgram->GetFileDependencies())
+ AddTechniqueFileDependency(tech, shaderProgramPath);
+ }
+ return shaderProgram;
+ };
+
CShaderDefines techDefines = tech->GetShaderDefines();
XERO_ITER_EL((*usableTech), Child)
{
if (Child.GetNodeName() == el_define)
{
techDefines.Add(CStrIntern(Child.GetAttributes().GetNamedItem(at_name)), CStrIntern(Child.GetAttributes().GetNamedItem(at_value)));
}
else if (Child.GetNodeName() == el_sort_by_distance)
{
tech->SetSortByDistance(true);
}
}
// We don't want to have a shader context depending on the order of define and
// pass tags.
// TODO: we might want to implement that in a proper way via splitting passes
// and tags in different groups in XML.
std::vector techPasses;
XERO_ITER_EL((*usableTech), Child)
{
if (Child.GetNodeName() == el_pass)
{
CShaderDefines passDefines = techDefines;
Renderer::Backend::SGraphicsPipelineStateDesc passPipelineStateDesc =
Renderer::Backend::MakeDefaultGraphicsPipelineStateDesc();
XERO_ITER_EL(Child, Element)
{
if (Element.GetNodeName() == el_define)
{
passDefines.Add(CStrIntern(Element.GetAttributes().GetNamedItem(at_name)), CStrIntern(Element.GetAttributes().GetNamedItem(at_value)));
}
else if (Element.GetNodeName() == el_blend)
{
passPipelineStateDesc.blendState.enabled = true;
passPipelineStateDesc.blendState.srcColorBlendFactor = passPipelineStateDesc.blendState.srcAlphaBlendFactor =
Renderer::Backend::ParseBlendFactor(Element.GetAttributes().GetNamedItem(at_src));
passPipelineStateDesc.blendState.dstColorBlendFactor = passPipelineStateDesc.blendState.dstAlphaBlendFactor =
Renderer::Backend::ParseBlendFactor(Element.GetAttributes().GetNamedItem(at_dst));
if (!Element.GetAttributes().GetNamedItem(at_op).empty())
{
passPipelineStateDesc.blendState.colorBlendOp = passPipelineStateDesc.blendState.alphaBlendOp =
Renderer::Backend::ParseBlendOp(Element.GetAttributes().GetNamedItem(at_op));
}
if (!Element.GetAttributes().GetNamedItem(at_constant).empty())
{
if (!passPipelineStateDesc.blendState.constant.ParseString(
Element.GetAttributes().GetNamedItem(at_constant)))
{
LOGERROR("Failed to parse blend constant: %s",
Element.GetAttributes().GetNamedItem(at_constant).c_str());
}
}
}
else if (Element.GetNodeName() == el_color)
{
passPipelineStateDesc.blendState.colorWriteMask = 0;
#define MASK_CHANNEL(ATTRIBUTE, VALUE) \
if (Element.GetAttributes().GetNamedItem(ATTRIBUTE) == "TRUE") \
passPipelineStateDesc.blendState.colorWriteMask |= Renderer::Backend::ColorWriteMask::VALUE
MASK_CHANNEL(at_mask_red, RED);
MASK_CHANNEL(at_mask_green, GREEN);
MASK_CHANNEL(at_mask_blue, BLUE);
MASK_CHANNEL(at_mask_alpha, ALPHA);
#undef MASK_CHANNEL
}
else if (Element.GetNodeName() == el_cull)
{
if (!Element.GetAttributes().GetNamedItem(at_mode).empty())
{
passPipelineStateDesc.rasterizationState.cullMode =
Renderer::Backend::ParseCullMode(Element.GetAttributes().GetNamedItem(at_mode));
}
if (!Element.GetAttributes().GetNamedItem(at_front_face).empty())
{
passPipelineStateDesc.rasterizationState.frontFace =
Renderer::Backend::ParseFrontFace(Element.GetAttributes().GetNamedItem(at_front_face));
}
}
else if (Element.GetNodeName() == el_depth)
{
if (!Element.GetAttributes().GetNamedItem(at_test).empty())
{
passPipelineStateDesc.depthStencilState.depthTestEnabled =
Element.GetAttributes().GetNamedItem(at_test) == "TRUE";
}
if (!Element.GetAttributes().GetNamedItem(at_func).empty())
{
passPipelineStateDesc.depthStencilState.depthCompareOp =
Renderer::Backend::ParseCompareOp(Element.GetAttributes().GetNamedItem(at_func));
}
if (!Element.GetAttributes().GetNamedItem(at_mask).empty())
{
passPipelineStateDesc.depthStencilState.depthWriteEnabled =
Element.GetAttributes().GetNamedItem(at_mask) == "true";
}
}
else if (Element.GetNodeName() == el_polygon)
{
if (!Element.GetAttributes().GetNamedItem(at_mode).empty())
{
passPipelineStateDesc.rasterizationState.polygonMode =
Renderer::Backend::ParsePolygonMode(Element.GetAttributes().GetNamedItem(at_mode));
}
}
else if (Element.GetNodeName() == el_stencil)
{
if (!Element.GetAttributes().GetNamedItem(at_test).empty())
{
passPipelineStateDesc.depthStencilState.stencilTestEnabled =
Element.GetAttributes().GetNamedItem(at_test) == "TRUE";
}
if (!Element.GetAttributes().GetNamedItem(at_reference).empty())
{
passPipelineStateDesc.depthStencilState.stencilReference =
Element.GetAttributes().GetNamedItem(at_reference).ToULong();
}
if (!Element.GetAttributes().GetNamedItem(at_mask_read).empty())
{
passPipelineStateDesc.depthStencilState.stencilReadMask =
Element.GetAttributes().GetNamedItem(at_mask_read).ToULong();
}
if (!Element.GetAttributes().GetNamedItem(at_mask).empty())
{
passPipelineStateDesc.depthStencilState.stencilWriteMask =
Element.GetAttributes().GetNamedItem(at_mask).ToULong();
}
if (!Element.GetAttributes().GetNamedItem(at_compare).empty())
{
passPipelineStateDesc.depthStencilState.stencilFrontFace.compareOp =
passPipelineStateDesc.depthStencilState.stencilBackFace.compareOp =
Renderer::Backend::ParseCompareOp(Element.GetAttributes().GetNamedItem(at_compare));
}
if (!Element.GetAttributes().GetNamedItem(at_fail).empty())
{
passPipelineStateDesc.depthStencilState.stencilFrontFace.failOp =
passPipelineStateDesc.depthStencilState.stencilBackFace.failOp =
Renderer::Backend::ParseStencilOp(Element.GetAttributes().GetNamedItem(at_fail));
}
if (!Element.GetAttributes().GetNamedItem(at_pass).empty())
{
passPipelineStateDesc.depthStencilState.stencilFrontFace.passOp =
passPipelineStateDesc.depthStencilState.stencilBackFace.passOp =
Renderer::Backend::ParseStencilOp(Element.GetAttributes().GetNamedItem(at_pass));
}
if (!Element.GetAttributes().GetNamedItem(at_depth_fail).empty())
{
passPipelineStateDesc.depthStencilState.stencilFrontFace.depthFailOp =
passPipelineStateDesc.depthStencilState.stencilBackFace.depthFailOp =
Renderer::Backend::ParseStencilOp(Element.GetAttributes().GetNamedItem(at_depth_fail));
}
}
}
// Load the shader program after we've read all the possibly-relevant s.
CShaderProgramPtr shaderProgram =
- LoadProgram(Child.GetAttributes().GetNamedItem(at_shader).c_str(), passDefines);
+ loadShaderProgramForTech(Child.GetAttributes().GetNamedItem(at_shader), passDefines);
if (shaderProgram)
{
- for (const VfsPath& shaderProgramPath : shaderProgram->GetFileDependencies())
- AddTechniqueFileDependency(tech, shaderProgramPath);
if (tech->GetPipelineStateDescCallback())
tech->GetPipelineStateDescCallback()(passPipelineStateDesc);
passPipelineStateDesc.shaderProgram = shaderProgram->GetBackendShaderProgram();
techPasses.emplace_back(
m_Device->CreateGraphicsPipelineState(passPipelineStateDesc), shaderProgram);
}
}
+ else if (Child.GetNodeName() == el_compute)
+ {
+ CShaderProgramPtr shaderProgram =
+ loadShaderProgramForTech(Child.GetAttributes().GetNamedItem(at_shader), techDefines);
+ if (shaderProgram)
+ {
+ Renderer::Backend::SComputePipelineStateDesc computePipelineStateDesc{};
+ computePipelineStateDesc.shaderProgram = shaderProgram->GetBackendShaderProgram();
+ tech->SetComputePipelineState(
+ m_Device->CreateComputePipelineState(computePipelineStateDesc), shaderProgram);
+ }
+ }
}
- tech->SetPasses(std::move(techPasses));
+ if (!techPasses.empty())
+ tech->SetPasses(std::move(techPasses));
return true;
}
size_t CShaderManager::GetNumEffectsLoaded() const
{
return m_EffectCache.size();
}
/*static*/ Status CShaderManager::ReloadChangedFileCB(void* param, const VfsPath& path)
{
return static_cast(param)->ReloadChangedFile(path);
}
Status CShaderManager::ReloadChangedFile(const VfsPath& path)
{
// Find all shader programs using this file.
const auto programs = m_HotloadPrograms.find(path);
if (programs != m_HotloadPrograms.end())
{
// Reload all shader programs using this file.
for (const std::weak_ptr& ptr : programs->second)
if (std::shared_ptr program = ptr.lock())
program->Reload();
}
// Find all shader techinques using this file. We need to reload them after
// shader programs.
const auto techniques = m_HotloadTechniques.find(path);
if (techniques != m_HotloadTechniques.end())
{
// Reload all shader techinques using this file.
for (const std::weak_ptr& ptr : techniques->second)
if (std::shared_ptr technique = ptr.lock())
{
if (!LoadTechnique(technique))
LOGERROR("Failed to reload technique '%s'", technique->GetPath().string8().c_str());
}
}
return INFO::OK;
}
void CShaderManager::AddTechniqueFileDependency(const CShaderTechniquePtr& technique, const VfsPath& path)
{
m_HotloadTechniques[path].insert(technique);
}
void CShaderManager::AddProgramFileDependency(const CShaderProgramPtr& program, const VfsPath& path)
{
m_HotloadPrograms[path].insert(program);
}
Index: ps/trunk/binaries/data/config/default.cfg
===================================================================
--- ps/trunk/binaries/data/config/default.cfg (revision 28009)
+++ ps/trunk/binaries/data/config/default.cfg (revision 28010)
@@ -1,604 +1,607 @@
; Global Configuration Settings
;
; **************************************************************
; * DO NOT EDIT THIS FILE if you want personal customisations: *
; * create a text file called "local.cfg" instead, and copy *
; * the lines from this file that you want to change. *
; * *
; * If a setting is part of a section (for instance [hotkey]) *
; * you need to append the section name at the beginning of *
; * your custom line (for instance you need to write *
; * "hotkey.pause = Space" if you want to change the pausing *
; * hotkey to the spacebar). *
; * *
; * On Linux, create: *
; * $XDG_CONFIG_HOME/0ad/config/local.cfg *
; * (Note: $XDG_CONFIG_HOME defaults to ~/.config) *
; * *
; * On OS X, create: *
; * ~/Library/Application\ Support/0ad/config/local.cfg *
; * *
; * On Windows, create: *
; * %appdata%\0ad\config\local.cfg *
; * *
; **************************************************************
; Enable/disable windowed mode by default. (Use Alt+Enter to toggle in the game.)
windowed = false
; Switches between real fullscreen and borderless window on a full display size.
borderless.fullscreen = true
; Hides a window border in the windowed mode.
borderless.window = false
; Constrain mouse in the fullscreen mode to a window boundaries.
window.mousegrabinfullscreen = true
; The same but for the window mode.
window.mousegrabinwindowmode = false
; Show detailed tooltips (Unit stats)
showdetailedtooltips = false
; Pause the game on window focus loss (Only applicable to single player mode)
pauseonfocusloss = true
; Persist settings after leaving the game setup screen
persistmatchsettings = true
; Default player name to use in multiplayer
; playername = "anonymous"
; Default server name or IP to use in multiplayer
multiplayerserver = "127.0.0.1"
; Force a particular resolution. (If these are 0, the default is
; to keep the current desktop resolution in fullscreen mode or to
; use 1024x768 in windowed mode.)
xres = 0
yres = 0
; Force a non-standard bit depth (if 0 then use the current desktop bit depth)
bpp = 0
; Preferred display (for multidisplay setups, only works with SDL 2.0)
display = 0
; Enable Hi-DPI where supported, currently working only for testing.
hidpi = false
; Allows to force GL version for SDL
forceglversion = false
forceglprofile = "compatibility" ; Possible values: compatibility, core, es
forceglmajorversion = 3
forceglminorversion = 3
; Big screenshot tiles
screenshot.tiles = 8
screenshot.tilewidth = 480
screenshot.tileheight = 270
; Emulate right-click with Ctrl+Click on Mac mice
macmouse = false
; System settings:
; if false, actors won't be rendered but anything entity will be.
renderactors = true
watereffects=true ; When disabled, force usage of the fixed pipeline water. This is faster, but really, really ugly.
waterfancyeffects = false
waterrealdepth = true
waterrefraction = true
waterreflection = true
shadows = true
shadowquality = 0 ; Shadow map resolution. (-1 - Low, 0 - Medium, 1 - High, 2 - Very High)
; High values can crash the game when using a graphics card with low memory!
shadowpcf = true
; Increases details closer to the camera but decreases performance
; especially on low hardware.
shadowscascadecount = 1
shadowscascadedistanceratio = 1.7
; Hides shadows after the distance.
shadowscutoffdistance = 300.0
; If true shadows cover the whole map instead of the camera frustum.
shadowscovermap = false
+renderer.scale = 1.0
+renderer.upscale.technique = "fsr"
+
vsync = false
particles = true
fog = true
silhouettes = true
showsky = true
; Uses a synchonized call to a GL driver to get an error state. Useful
; for a debugging of a system without GL_KHR_debug.
gl.checkerrorafterswap = false
; Different ways to draw a cursor, possible values are "sdl" and "system".
; The "system" one doesn't support a visual change of the cursor.
cursorbackend = "sdl"
; Backends for all graphics rendering:
; glarb - GL with legacy assembler-like shaders, might used only for buggy drivers.
; gl - GL with GLSL shaders, should be used by default.
; dummy - backend that does nothing, allows to check performance without backend drivers.
; vulkan - Vulkan with SPIR-V shaders.
rendererbackend = "gl"
; Enables additional debug information in renderer backend.
renderer.backend.debugcontext = false
renderer.backend.debugmessages = false
renderer.backend.debuglabels = false
renderer.backend.debugscopedlabels = false
renderer.backend.gl.enableframebufferinvalidating = false
renderer.backend.vulkan.disabledescriptorindexing = false
renderer.backend.vulkan.deviceindexoverride = -1
renderer.backend.vulkan.debugbarrierafterframebufferpass = false
renderer.backend.vulkan.debugwaitidlebeforeacquire = false
renderer.backend.vulkan.debugwaitidlebeforepresent = false
renderer.backend.vulkan.debugwaitidleafterpresent = false
; Should not be edited. It's used only for preventing of running fixed pipeline.
renderpath = default
; (0 - low, 1 - medium, 2 - high), higher quality means worse performance.
textures.quality = 2
; (1, 2, 4, 8 and 16)
textures.maxanisotropy = 2
;;;;; EXPERIMENTAL ;;;;;
; Experimental probably-non-working GPU skinning support; requires GLSL; use at own risk
gpuskinning = false
; Use smooth LOS interpolation
smoothlos = true
; Use screen-space postprocessing filters (HDR, bloom, DOF, etc). Incompatible with fixed renderpath.
postproc = true
; Use anti-aliasing techniques.
antialiasing = "disabled"
; Use sharpening techniques.
sharpening = "disabled"
sharpness = 0.3
; Quality used for actors.
max_actor_quality=200
; Whether or not actor variants are selected randomly, possible values are "full", "limited", "none".
variant_diversity = "full"
; Quality level of shader effects (set to 10 to display all effects)
materialmgr.quality = 10.0
;;;;;;;;;;;;;;;;;;;;;;;;
[adaptivefps]
session = 60 ; Throttle FPS in running games (prevents 100% CPU workload).
menu = 60 ; Throttle FPS in menus only.
[profiler2]
server = "127.0.0.1"
server.port = "8000" ; Use a free port on your machine.
server.threads = "6" ; Enough for the browser's parallel connection limit
[hotkey]
; Each one of the specified keys will trigger the action on the left
; for multiple-key combinations, separate keys with '+'.
; See keys.txt for the list of key names.
; > SYSTEM SETTINGS
exit = "" ; 'Custom' exit to desktop, SDL handles the native command via SDL_Quit.
cancel = Escape ; Close or cancel the current dialog box/popup
confirm = Return ; Confirm the current command
pause = Pause, "Shift+Space" ; Pause/unpause game
screenshot = F2 ; Take PNG screenshot
bigscreenshot = "Shift+F2" ; Take large BMP screenshot
togglefullscreen = "Alt+Return" ; Toggle fullscreen/windowed mode
screenshot.watermark = "Alt+K" ; Toggle product/company watermark for official screenshots
wireframe = "Alt+Shift+W" ; Toggle wireframe mode
silhouettes = "Alt+Shift+S" ; Toggle unit silhouettes
; > DIALOG HOTKEYS
summary = "Ctrl+Tab" ; Toggle in-game summary
lobby = "Alt+L" ; Show the multiplayer lobby in a dialog window.
structree = "Alt+Shift+T" ; Show structure tree
civinfo = "Alt+Shift+H" ; Show civilization info
; > CLIPBOARD CONTROLS
copy = "Ctrl+C" ; Copy to clipboard
paste = "Ctrl+V" ; Paste from clipboard
cut = "Ctrl+X" ; Cut selected text and copy to the clipboard
; > CONSOLE SETTINGS
console.toggle = BackQuote, F9 ; Open/close console
; > OVERLAY KEYS
fps.toggle = "Alt+F" ; Toggle frame counter
realtime.toggle = "Alt+T" ; Toggle current display of computer time
timeelapsedcounter.toggle = "F12" ; Toggle time elapsed counter
ceasefirecounter.toggle = "" ; Toggle ceasefire counter
; > HOTKEYS ONLY
chat = Return ; Toggle chat window
teamchat = "T" ; Toggle chat window in team chat mode
privatechat = "L" ; Toggle chat window and select the previous private chat partner
; > QUICKSAVE
quicksave = "Shift+F5"
quickload = "Shift+F8"
[hotkey.camera]
reset = "R" ; Reset camera rotation to default.
follow = "F" ; Follow the first unit in the selection
rallypointfocus = "" ; Focus the camera on the rally point of the selected building
lastattackfocus = "Space" ; Focus the camera on the last notified attack
zoom.in = Plus, NumPlus ; Zoom camera in (continuous control)
zoom.out = Minus, NumMinus ; Zoom camera out (continuous control)
zoom.wheel.in = WheelUp ; Zoom camera in (stepped control)
zoom.wheel.out = WheelDown ; Zoom camera out (stepped control)
rotate.up = "Ctrl+UpArrow", "Ctrl+W" ; Rotate camera to look upwards
rotate.down = "Ctrl+DownArrow", "Ctrl+S" ; Rotate camera to look downwards
rotate.cw = "Ctrl+LeftArrow", "Ctrl+A", Q ; Rotate camera clockwise around terrain
rotate.ccw = "Ctrl+RightArrow", "Ctrl+D", E ; Rotate camera anticlockwise around terrain
rotate.wheel.cw = "Shift+WheelUp", MouseX1 ; Rotate camera clockwise around terrain (stepped control)
rotate.wheel.ccw = "Shift+WheelDown", MouseX2 ; Rotate camera anticlockwise around terrain (stepped control)
pan = MouseMiddle ; Enable scrolling by moving mouse
left = A, LeftArrow ; Scroll or rotate left
right = D, RightArrow ; Scroll or rotate right
up = W, UpArrow ; Scroll or rotate up/forwards
down = S, DownArrow ; Scroll or rotate down/backwards
scroll.speed.increase = "Ctrl+Shift+S" ; Increase scroll speed
scroll.speed.decrease = "Ctrl+Alt+S" ; Decrease scroll speed
rotate.speed.increase = "Ctrl+Shift+R" ; Increase rotation speed
rotate.speed.decrease = "Ctrl+Alt+R" ; Decrease rotation speed
zoom.speed.increase = "Ctrl+Shift+Z" ; Increase zoom speed
zoom.speed.decrease = "Ctrl+Alt+Z" ; Decrease zoom speed
[hotkey.camera.jump]
1 = F5 ; Jump to position N
2 = F6
3 = F7
4 = F8
;5 =
;6 =
;7 =
;8 =
;9 =
;10 =
[hotkey.camera.jump.set]
1 = "Ctrl+F5" ; Set jump position N
2 = "Ctrl+F6"
3 = "Ctrl+F7"
4 = "Ctrl+F8"
;5 =
;6 =
;7 =
;8 =
;9 =
;10 =
[hotkey.profile]
toggle = "F11" ; Enable/disable real-time profiler
save = "Shift+F11" ; Save current profiler data to logs/profile.txt
[hotkey.profile2]
toggle = "Ctrl+F11" ; Enable/disable HTTP/GPU modes for new profiler
[hotkey.selection]
cancel = Esc ; Un-select all units and cancel building placement
add = Shift ; Add units to selection
militaryonly = Alt ; Add only military units to the selection
nonmilitaryonly = "Alt+Y" ; Add only non-military units to the selection
idleonly = "I" ; Select only idle units
woundedonly = "O" ; Select only wounded units
remove = Ctrl ; Remove units from selection
idlebuilder = Semicolon ; Select next idle builder
idleworker = Period, NumDecimal ; Select next idle worker
idlewarrior = Slash, NumDivide ; Select next idle warrior
idleunit = BackSlash ; Select next idle unit
offscreen = Alt ; Include offscreen units in selection
singleselection = "" ; Modifier to select units individually, opposed to per formation.
[hotkey.selection.group.add]
1 = "Shift+1", "Shift+Num1"
2 = "Shift+2", "Shift+Num2"
3 = "Shift+3", "Shift+Num3"
4 = "Shift+4", "Shift+Num4"
5 = "Shift+5", "Shift+Num5"
6 = "Shift+6", "Shift+Num6"
7 = "Shift+7", "Shift+Num7"
8 = "Shift+8", "Shift+Num8"
9 = "Shift+9", "Shift+Num9"
10 = "Shift+0", "Shift+Num0"
[hotkey.selection.group.save]
1 = "Ctrl+1", "Ctrl+Num1"
2 = "Ctrl+2", "Ctrl+Num2"
3 = "Ctrl+3", "Ctrl+Num3"
4 = "Ctrl+4", "Ctrl+Num4"
5 = "Ctrl+5", "Ctrl+Num5"
6 = "Ctrl+6", "Ctrl+Num6"
7 = "Ctrl+7", "Ctrl+Num7"
8 = "Ctrl+8", "Ctrl+Num8"
9 = "Ctrl+9", "Ctrl+Num9"
10 = "Ctrl+0", "Ctrl+Num0"
[hotkey.selection.group.select]
1 = 1, Num1
2 = 2, Num2
3 = 3, Num3
4 = 4, Num4
5 = 5, Num5
6 = 6, Num6
7 = 7, Num7
8 = 8, Num8
9 = 9, Num9
10 = 0, Num0
[hotkey.gamesetup]
mapbrowser.open = "M"
[hotkey.session]
kill = Delete, Backspace ; Destroy selected units
stop = "H" ; Stop the current action
backtowork = "Y" ; The unit will go back to work
unload = "U" ; Unload garrisoned units when a building/mechanical unit is selected
unloadturrets = "U" ; Unload turreted units.
leaveturret = "U" ; Leave turret point.
move = "" ; Modifier to move to a point instead of another action (e.g. gather)
capture = "C" ; Modifier to capture instead of another action (e.g. attack)
attack = "" ; Modifier to attack instead of another action (e.g. capture)
attackmove = Ctrl ; Modifier to attackmove when clicking on a point
attackmoveUnit = "Ctrl+Q" ; Modifier to attackmove targeting only units when clicking on a point
garrison = Ctrl ; Modifier to garrison when clicking on building
occupyturret = Ctrl ; Modifier to occupy a turret when clicking on a turret holder.
autorallypoint = Ctrl ; Modifier to set the rally point on the building itself
guard = "G" ; Modifier to escort/guard when clicking on unit/building
patrol = "P" ; Modifier to patrol a unit
repair = "J" ; Modifier to repair when clicking on building/mechanical unit
queue = Shift ; Modifier to queue unit orders instead of replacing
pushorderfront = "" ; Modifier to push unit orders to the front instead of replacing.
orderone = Alt ; Modifier to order only one entity in selection.
batchtrain = Shift ; Modifier to train units in batches
massbarter = Shift ; Modifier to barter bunch of resources
masstribute = Shift ; Modifier to tribute bunch of resources
noconfirmation = Shift ; Do not ask confirmation when deleting a building/unit
fulltradeswap = Shift ; Modifier to put the desired trade resource to 100%
unloadtype = Shift ; Modifier to unload all units of type
deselectgroup = Ctrl ; Modifier to deselect units when clicking group icon, instead of selecting
rotate.cw = RightBracket ; Rotate building placement preview clockwise
rotate.ccw = LeftBracket ; Rotate building placement preview anticlockwise
snaptoedges = Ctrl ; Modifier to align new structures with nearby existing structure
toggledefaultformation = "" ; Switch between null default formation and the last default formation used (defaults to "box")
flare = K ; Modifier to send a flare to your allies
flareactivate = "" ; Modifier to activate the mode to send a flare to your allies
calltoarms = "" ; Modifier to call the selected units to the arms.
; Overlays
showstatusbars = Tab ; Toggle display of status bars
devcommands.toggle = "Alt+D" ; Toggle developer commands panel
highlightguarding = PageDown ; Toggle highlight of guarding units
highlightguarded = PageUp ; Toggle highlight of guarded units
diplomacycolors = "Alt+X" ; Toggle diplomacy colors
toggleattackrange = "Alt+C" ; Toggle display of attack range overlays of selected defensive structures
toggleaurasrange = "Alt+V" ; Toggle display of aura range overlays of selected units and structures
togglehealrange = "Alt+B" ; Toggle display of heal range overlays of selected units
[hotkey.session.gui]
toggle = "Alt+G" ; Toggle visibility of session GUI
menu.toggle = "F10" ; Toggle in-game menu
diplomacy.toggle = "Ctrl+H" ; Toggle in-game diplomacy page
barter.toggle = "Ctrl+B" ; Toggle in-game barter/trade page
objectives.toggle = "Ctrl+O" ; Toggle in-game objectives page
tutorial.toggle = "Ctrl+P" ; Toggle in-game tutorial panel
[hotkey.session.savedgames]
delete = Delete, Backspace ; Delete the selected saved game asking confirmation
noconfirmation = Shift ; Do not ask confirmation when deleting a game
[hotkey.session.queueunit] ; > UNIT TRAINING
1 = "Z" ; add first unit type to queue
2 = "X" ; add second unit type to queue
3 = "C" ; add third unit type to queue
4 = "V" ; add fourth unit type to queue
5 = "B" ; add fivth unit type to queue
6 = "N" ; add sixth unit type to queue
7 = "M" ; add seventh unit type to queue
8 = Comma ; add eighth unit type to queue
[hotkey.session.timewarp]
fastforward = "Ctrl+Space" ; If timewarp mode enabled, speed up the game
rewind = "Shift+Backspace" ; If timewarp mode enabled, go back to earlier point in the game
[hotkey.tab]
next = "Tab", "Alt+S" ; Show the next tab
prev = "Shift+Tab", "Alt+W" ; Show the previous tab
[hotkey.text] ; > GUI TEXTBOX HOTKEYS
delete.left = "Ctrl+Backspace" ; Delete word to the left of cursor
delete.right = "Ctrl+Del" ; Delete word to the right of cursor
move.left = "Ctrl+LeftArrow" ; Move cursor to start of word to the left of cursor
move.right = "Ctrl+RightArrow" ; Move cursor to start of word to the right of cursor
[gui]
cursorblinkrate = 0.5 ; Cursor blink rate in seconds (0.0 to disable blinking)
scale = 1.0 ; GUI scaling factor, for improved compatibility with 4K displays
[gui.gamesetup]
enabletips = true ; Enable/Disable tips during gamesetup (for newcomers)
assignplayers = everyone ; Whether to assign joining clients to free playerslots. Possible values: everyone, buddies, disabled.
aidifficulty = 3 ; Difficulty level, from 0 (easiest) to 5 (hardest)
aibehavior = "random" ; Default behavior of the AI (random, balanced, aggressive or defensive)
settingsslide = true ; Enable/Disable settings panel slide
[gui.loadingscreen]
progressdescription = false ; Whether to display the progress percent or a textual description
[gui.session]
dragdelta = 4 ; Number of pixels the mouse can move before the action is considered a drag
camerajump.threshold = 40 ; How close do we have to be to the actual location in order to jump back to the previous one?
timeelapsedcounter = false ; Show the game duration in the top right corner
ceasefirecounter = false ; Show the remaining ceasefire time in the top right corner
batchtrainingsize = 5 ; Number of units to be trained per batch by default (when pressing the hotkey)
scrollbatchratio = 1 ; Number of times you have to scroll to increase/decrease the batchsize by 1
flarelifetime = 6 ; How long the flare markers on the minimap are displayed in seconds
woundedunithotkeythreshold = 33 ; The wounded unit hotkey considers the selected units as wounded if their health percentage falls below this number
attackrange = true ; Display attack range overlays of selected defensive structures
aurasrange = true ; Display aura range overlays of selected units and structures
healrange = true ; Display heal range overlays of selected units
rankabovestatusbar = true ; Show rank icons above status bars
experiencestatusbar = true ; Show an experience status bar above each selected unit
respoptooltipsort = 0 ; Sorting players in the resources and population tooltip by value (0 - no sort, -1 - ascending, 1 - descending)
snaptoedges = "disabled" ; Possible values: disabled, enabled.
snaptoedgesdistancethreshold = 15 ; On which distance we don't snap to edges
disjointcontrolgroups = "true" ; Whether control groups are disjoint sets or entities can be in multiple control groups at the same time.
defaultformation = "special/formations/box" ; For walking orders, automatically put units into this formation if they don't have one already.
formationwalkonly = "true" ; Formations are disabled when giving gather/attack/... orders.
howtoshownames = 0 ; Whether the specific names are show as default, as opposed to the generic names. And whether the secondary names are shown. (0 - show both; specific names primary, 1 - show both; generic names primary, 2 - show only specific names, 3 - show only generic names)
selectformationasone = "true" ; Whether to select formations as a whole by default.
[gui.session.minimap]
; Icons that are displayed for some entities on a minimap.
icons.enabled = "true"
icons.opacity = 1.0
icons.sizescale = 1.0
blinkduration = 1.7 ; The blink duration while pinging
pingduration = 50.0 ; The duration for which an entity will be pinged after an attack notification
[gui.session.notifications]
attack = true ; Show a chat notification if you are attacked by another player
tribute = true ; Show a chat notification if an ally tributes resources to another team member if teams are locked, and all tributes in observer mode
barter = true ; Show a chat notification to observers when a player bartered resources
phase = completed ; Show a chat notification if you or an ally have started, aborted or completed a new phase, and phases of all players in observer mode. Possible values: none, completed, all.
[gui.splashscreen]
enable = true ; Enable/disable the splashscreen
version = 0 ; Splashscreen version (date of last modification). By default, 0 to force splashscreen to appear at first launch
[gui.session.diplomacycolors]
self = "21 55 149" ; Color of your units when diplomacy colors are enabled
ally = "86 180 31" ; Color of allies when diplomacy colors are enabled
neutral = "231 200 5" ; Color of neutral players when diplomacy colors are enabled
enemy = "150 20 20" ; Color of enemies when diplomacy colors are enabled
[joystick] ; EXPERIMENTAL: joystick/gamepad settings
enable = false
deadzone = 8192
[chat]
timestamp = true ; Show at which time chat messages have been sent
[chat.session]
extended = true ; Whether to display the chat history
[lobby]
history = 0 ; Number of past messages to display on join
room = "arena27" ; Default MUC room to join
server = "lobby.wildfiregames.com" ; Address of lobby server
tls = true ; Whether to use TLS encryption when connecting to the server.
verify_certificate = false ; Whether to reject connecting to the lobby if the TLS certificate is invalid (TODO: wait for Gloox GnuTLS trust implementation to be fixed)
terms_url = "https://trac.wildfiregames.com/browser/ps/trunk/binaries/data/mods/public/gui/prelobby/common/terms/"; Allows the user to save the text and print the terms
terms_of_service = "0" ; Version (hash) of the Terms of Service that the user has accepted
terms_of_use = "0" ; Version (hash) of the Terms of Use that the user has accepted
privacy_policy = "0" ; Version (hash) of the Privacy Policy that the user has accepted
xpartamupp = "wfgbot27" ; Name of the server-side XMPP-account that manage games
echelon = "echelon27" ; Name of the server-side XMPP-account that manages ratings
buddies = "," ; Comma separated list of playernames that the current user has marked as buddies
rememberpassword = true ; Whether to store the encrypted password in the user config
[lobby.columns]
gamerating = false ; Show the average rating of the participating players in a column of the gamelist
[lobby.stun]
enabled = true ; The STUN protocol allows hosting games without configuring the firewall and router.
; If STUN is disabled, the game relies on direct connection, UPnP and port forwarding.
server = "lobby.wildfiregames.com" ; Address of the STUN server.
port = 3478 ; Port of the STUN server.
delay = 200 ; Duration in milliseconds that is waited between STUN messages.
; Smaller numbers speed up joins but also become less stable.
[mod]
enabledmods = "mod public"
[modio]
public_key = "RWQv2alKl8D0zMDJR766jpYvPy4u3y77HL/iKb/lsT1Fnf6ezoMb2x8+" ; Public key corresponding to the private key valid mods are signed with
disclaimer = "0" ; Version (hash) of the Disclaimer that the user has accepted
[modio.v1]
baseurl = "https://api.mod.io/v1"
api_key = "23df258a71711ea6e4b50893acc1ba55"
name_id = "0ad"
[network]
duplicateplayernames = false ; Rename joining player to "User (2)" if "User" is already connected, otherwise prohibit join.
lateobservers = everyone ; Allow observers to join the game after it started. Possible values: everyone, buddies, disabled.
observerlimit = 8 ; Prevent further observer joins in running games if this limit is reached
observermaxlag = -1 ; Make clients wait for observers if they lag more than X turns behind. -1 means "never wait for observers".
autocatchup = true ; Auto-accelerate the sim rate if lagging behind (as an observer).
enetmtu = 1372 ; Lower ENet protocol MTU in case packets get further fragmented on the UDP layer which may cause drops.
[overlay]
fps = "false" ; Show frames per second in top right corner
realtime = "false" ; Show current system time in top right corner
netwarnings = "true" ; Show warnings if the network connection is bad
[profiler2]
autoenable = false ; Enable HTTP server output at startup (default off for security/performance)
gpu.arb.enable = true ; Allow GL_ARB_timer_query timing mode when available.
[rlinterface]
address = "127.0.0.1:6000"
[sound]
mastergain = 0.9
musicgain = 0.2
ambientgain = 0.6
actiongain = 0.7
uigain = 0.7
mindistance = 1
maxdistance = 350
maxstereoangle = 0.62 ; About PI/5 radians
[sound.notify]
nick = true ; Play a sound when someone mentions your name in the lobby or game
gamesetup.join = false ; Play a sound when a new client joins the game setup
[tinygettext]
debug = false ; Print error messages each time a translation for an English string is not found.
[userreport] ; Opt-in online user reporting system
url_upload = "https://feedback.wildfiregames.com/report/upload/v1/" ; URL where UserReports are uploaded to
url_publication = "https://feedback.wildfiregames.com/" ; URL where UserReports were analyzed and published
url_terms = "https://trac.wildfiregames.com/browser/ps/trunk/binaries/data/mods/public/gui/userreport/Terms_and_Conditions.txt"; Allows the user to save the text and print the terms
terms = "0" ; Version (hash) of the UserReporter Terms that the user has accepted
[view] ; Camera control settings
scroll.speed = 120.0
scroll.speed.modifier = 1.05 ; Multiplier for changing scroll speed
scroll.mouse.detectdistance = 3
rotate.x.speed = 1.2
rotate.x.min = 28.0
rotate.x.max = 60.0
rotate.x.default = 35.0
rotate.y.speed = 2.0
rotate.y.speed.wheel = 0.45
rotate.y.default = 0.0
rotate.speed.modifier = 1.05 ; Multiplier for changing rotation speed
drag.speed = 0.5
zoom.speed = 256.0
zoom.speed.wheel = 32.0
zoom.min = 50.0
zoom.max = 200.0
zoom.default = 120.0
zoom.speed.modifier = 1.05 ; Multiplier for changing zoom speed
pos.smoothness = 0.1
zoom.smoothness = 0.4
rotate.x.smoothness = 0.5
rotate.y.smoothness = 0.3
near = 2.0 ; Near plane distance
far = 4096.0 ; Far plane distance
fov = 45.0 ; Field of view (degrees), lower is narrow, higher is wide
height.smoothness = 0.5
height.min = 16
Index: ps/trunk/binaries/data/mods/mod/shaders/effects/compute_downscale.xml
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/effects/compute_downscale.xml (nonexistent)
+++ ps/trunk/binaries/data/mods/mod/shaders/effects/compute_downscale.xml (revision 28010)
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
Property changes on: ps/trunk/binaries/data/mods/mod/shaders/effects/compute_downscale.xml
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: ps/trunk/binaries/data/mods/mod/shaders/effects/compute_rcas.xml
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/effects/compute_rcas.xml (nonexistent)
+++ ps/trunk/binaries/data/mods/mod/shaders/effects/compute_rcas.xml (revision 28010)
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
Property changes on: ps/trunk/binaries/data/mods/mod/shaders/effects/compute_rcas.xml
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: ps/trunk/binaries/data/mods/mod/shaders/effects/compute_upscale_fsr.xml
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/effects/compute_upscale_fsr.xml (nonexistent)
+++ ps/trunk/binaries/data/mods/mod/shaders/effects/compute_upscale_fsr.xml (revision 28010)
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
Property changes on: ps/trunk/binaries/data/mods/mod/shaders/effects/compute_upscale_fsr.xml
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: ps/trunk/binaries/data/mods/mod/shaders/effects/upscale_bilinear.xml
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/effects/upscale_bilinear.xml (nonexistent)
+++ ps/trunk/binaries/data/mods/mod/shaders/effects/upscale_bilinear.xml (revision 28010)
@@ -0,0 +1,15 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Property changes on: ps/trunk/binaries/data/mods/mod/shaders/effects/upscale_bilinear.xml
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: ps/trunk/binaries/data/mods/mod/shaders/effects/upscale_nearest.xml
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/effects/upscale_nearest.xml (nonexistent)
+++ ps/trunk/binaries/data/mods/mod/shaders/effects/upscale_nearest.xml (revision 28010)
@@ -0,0 +1,15 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Property changes on: ps/trunk/binaries/data/mods/mod/shaders/effects/upscale_nearest.xml
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: ps/trunk/binaries/data/mods/mod/shaders/glsl/common/compute.h
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/glsl/common/compute.h (nonexistent)
+++ ps/trunk/binaries/data/mods/mod/shaders/glsl/common/compute.h (revision 28010)
@@ -0,0 +1,22 @@
+#ifndef INCLUDED_COMMON_COMPUTE
+#define INCLUDED_COMMON_COMPUTE
+
+#include "common/descriptor_indexing.h"
+#include "common/texture.h"
+#include "common/uniform.h"
+
+#if STAGE_COMPUTE
+
+#if USE_SPIRV
+#define STORAGE_2D(LOCATION, FORMAT, NAME) \
+ layout(set = 2, binding = LOCATION, FORMAT) uniform image2D NAME
+#else
+// We use offset to the binding slot for OpenGL to avoid overlapping with other
+// textures as OpenGL doesn't have sets.
+#define STORAGE_2D(LOCATION, FORMAT, NAME) \
+ layout(binding = LOCATION, FORMAT) uniform image2D NAME
+#endif
+
+#endif // STAGE_COMPUTE
+
+#endif // INCLUDED_COMMON_COMPUTE
Property changes on: ps/trunk/binaries/data/mods/mod/shaders/glsl/common/compute.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: ps/trunk/binaries/data/mods/mod/shaders/glsl/common/descriptor_indexing.h
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/glsl/common/descriptor_indexing.h (nonexistent)
+++ ps/trunk/binaries/data/mods/mod/shaders/glsl/common/descriptor_indexing.h (revision 28010)
@@ -0,0 +1,12 @@
+#ifndef INCLUDED_COMMON_DESCRIPTOR_INDEXING
+#define INCLUDED_COMMON_DESCRIPTOR_INDEXING
+
+#if USE_SPIRV && USE_DESCRIPTOR_INDEXING
+#extension GL_EXT_nonuniform_qualifier : enable
+const int DESCRIPTOR_INDEXING_SET_SIZE = 16384;
+layout (set = 0, binding = 0) uniform sampler2D textures2D[DESCRIPTOR_INDEXING_SET_SIZE];
+layout (set = 0, binding = 1) uniform samplerCube texturesCube[DESCRIPTOR_INDEXING_SET_SIZE];
+layout (set = 0, binding = 2) uniform sampler2DShadow texturesShadow[DESCRIPTOR_INDEXING_SET_SIZE];
+#endif // USE_SPIRV && USE_DESCRIPTOR_INDEXING
+
+#endif // INCLUDED_COMMON_DESCRIPTOR_INDEXING
Property changes on: ps/trunk/binaries/data/mods/mod/shaders/glsl/common/descriptor_indexing.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: ps/trunk/binaries/data/mods/mod/shaders/glsl/common/fragment.h
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/glsl/common/fragment.h (revision 28009)
+++ ps/trunk/binaries/data/mods/mod/shaders/glsl/common/fragment.h (revision 28010)
@@ -1,35 +1,28 @@
#ifndef INCLUDED_COMMON_FRAGMENT
#define INCLUDED_COMMON_FRAGMENT
+#include "common/descriptor_indexing.h"
#include "common/texture.h"
#include "common/uniform.h"
#if USE_SPIRV
-#if USE_DESCRIPTOR_INDEXING
-#extension GL_EXT_nonuniform_qualifier : enable
-const int DESCRIPTOR_INDEXING_SET_SIZE = 16384;
-layout (set = 0, binding = 0) uniform sampler2D textures2D[DESCRIPTOR_INDEXING_SET_SIZE];
-layout (set = 0, binding = 1) uniform samplerCube texturesCube[DESCRIPTOR_INDEXING_SET_SIZE];
-layout (set = 0, binding = 2) uniform sampler2DShadow texturesShadow[DESCRIPTOR_INDEXING_SET_SIZE];
-#endif // USE_DESCRIPTOR_INDEXING
-
layout (location = 0) out vec4 fragmentColor;
#define OUTPUT_FRAGMENT_SINGLE_COLOR(COLOR) \
fragmentColor = COLOR
#define OUTPUT_FRAGMENT_COLOR(LOCATION, COLOR) \
gl_FragData[LOCATION] = COLOR
#else // USE_SPIRV
#define OUTPUT_FRAGMENT_SINGLE_COLOR(COLOR) \
gl_FragColor = COLOR
#define OUTPUT_FRAGMENT_COLOR(LOCATION, COLOR) \
gl_FragData[LOCATION] = COLOR
#endif // USE_SPIRV
#endif // INCLUDED_COMMON_FRAGMENT
Index: ps/trunk/binaries/data/mods/mod/shaders/glsl/common/uniform.h
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/glsl/common/uniform.h (revision 28009)
+++ ps/trunk/binaries/data/mods/mod/shaders/glsl/common/uniform.h (revision 28010)
@@ -1,93 +1,93 @@
#ifndef INCLUDED_COMMON_UNIFORM
#define INCLUDED_COMMON_UNIFORM
#if USE_SPIRV
#if USE_DESCRIPTOR_INDEXING
#define BEGIN_DRAW_TEXTURES struct DrawTextures {
#define END_DRAW_TEXTURES };
#define NO_DRAW_TEXTURES uint padding; // We can't have empty struct in GLSL.
#define TEXTURE_2D(LOCATION, NAME) uint NAME;
#define TEXTURE_2D_SHADOW(LOCATION, NAME) uint NAME;
#define TEXTURE_CUBE(LOCATION, NAME) uint NAME;
#define GET_DRAW_TEXTURE_2D(NAME) \
textures2D[drawTextures.NAME]
#define GET_DRAW_TEXTURE_2D_SHADOW(NAME) \
texturesShadow[drawTextures.NAME]
#define GET_DRAW_TEXTURE_CUBE(NAME) \
texturesCube[drawTextures.NAME]
#else // USE_DESCRIPTOR_INDEXING
#define BEGIN_DRAW_TEXTURES
#define END_DRAW_TEXTURES
#define NO_DRAW_TEXTURES
-#if STAGE_FRAGMENT
+#if STAGE_FRAGMENT || STAGE_COMPUTE
#define TEXTURE_2D(LOCATION, NAME) \
layout (set = 1, binding = LOCATION) uniform sampler2D NAME;
#define TEXTURE_2D_SHADOW(LOCATION, NAME) \
layout (set = 1, binding = LOCATION) uniform sampler2DShadow NAME;
#define TEXTURE_CUBE(LOCATION, NAME) \
layout (set = 1, binding = LOCATION) uniform samplerCube NAME;
#else
#define TEXTURE_2D(LOCATION, NAME)
#define TEXTURE_2D_SHADOW(LOCATION, NAME)
#define TEXTURE_CUBE(LOCATION, NAME)
#endif
#define GET_DRAW_TEXTURE_2D(NAME) NAME
#define GET_DRAW_TEXTURE_2D_SHADOW(NAME) NAME
#define GET_DRAW_TEXTURE_CUBE(NAME) NAME
#endif // USE_DESCRIPTOR_INDEXING
#if USE_DESCRIPTOR_INDEXING
#define BEGIN_DRAW_UNIFORMS layout (push_constant) uniform DrawUniforms {
#define END_DRAW_UNIFORMS DrawTextures drawTextures; };
#define BEGIN_MATERIAL_UNIFORMS layout (std140, set = 1, binding = 0) uniform MaterialUniforms {
#define END_MATERIAL_UNIFORMS };
#else
#define BEGIN_DRAW_UNIFORMS layout (push_constant) uniform DrawUniforms {
#define END_DRAW_UNIFORMS };
#define BEGIN_MATERIAL_UNIFORMS layout (std140, set = 0, binding = 0) uniform MaterialUniforms {
#define END_MATERIAL_UNIFORMS };
#endif
#define UNIFORM(TYPE, NAME) \
TYPE NAME;
#else // USE_SPIRV
#define BEGIN_DRAW_TEXTURES
#define END_DRAW_TEXTURES
#define NO_DRAW_TEXTURES
-#if STAGE_FRAGMENT
+#if STAGE_FRAGMENT || STAGE_COMPUTE
#define TEXTURE_2D(LOCATION, NAME) \
uniform sampler2D NAME;
#define TEXTURE_2D_SHADOW(LOCATION, NAME) \
uniform sampler2DShadow NAME;
#define TEXTURE_CUBE(LOCATION, NAME) \
uniform samplerCube NAME;
#else
#define TEXTURE_2D(LOCATION, NAME)
#define TEXTURE_2D_SHADOW(LOCATION, NAME)
#define TEXTURE_CUBE(LOCATION, NAME)
#endif
#define GET_DRAW_TEXTURE_2D(NAME) \
NAME
#define GET_DRAW_TEXTURE_2D_SHADOW(NAME) \
NAME
#define GET_DRAW_TEXTURE_CUBE(NAME) \
NAME
#define BEGIN_DRAW_UNIFORMS
#define END_DRAW_UNIFORMS
#define BEGIN_MATERIAL_UNIFORMS
#define END_MATERIAL_UNIFORMS
#define UNIFORM(TYPE, NAME) \
uniform TYPE NAME;
#endif // USE_SPIRV
#endif // INCLUDED_COMMON_UNIFORM
Index: ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_downscale.cs
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_downscale.cs (nonexistent)
+++ ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_downscale.cs (revision 28010)
@@ -0,0 +1,23 @@
+#version 430
+
+#include "common/compute.h"
+
+BEGIN_DRAW_TEXTURES
+ TEXTURE_2D(0, inTex)
+END_DRAW_TEXTURES
+
+BEGIN_DRAW_UNIFORMS
+ UNIFORM(vec4, screenSize)
+END_DRAW_UNIFORMS
+
+STORAGE_2D(0, rgba8, outTex);
+
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+void main()
+{
+ ivec2 position = ivec2(gl_GlobalInvocationID.xy);
+ if (any(greaterThanEqual(position, ivec2(screenSize.zw))))
+ return;
+ vec2 uv = (vec2(position) + vec2(0.5, 0.5)) / screenSize.zw;
+ imageStore(outTex, position, texture(GET_DRAW_TEXTURE_2D(inTex), uv));
+}
Property changes on: ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_downscale.cs
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_downscale.xml
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_downscale.xml (nonexistent)
+++ ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_downscale.xml (revision 28010)
@@ -0,0 +1,6 @@
+
+
+
+
+
+
Property changes on: ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_downscale.xml
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_rcas.cs
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_rcas.cs (nonexistent)
+++ ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_rcas.cs (revision 28010)
@@ -0,0 +1,50 @@
+#version 430
+
+#include "common/compute.h"
+
+BEGIN_DRAW_TEXTURES
+ TEXTURE_2D(0, inTex)
+END_DRAW_TEXTURES
+
+BEGIN_DRAW_UNIFORMS
+ UNIFORM(float, sharpness)
+END_DRAW_UNIFORMS
+
+STORAGE_2D(0, rgba8, outTex);
+
+#define A_GPU 1
+#define A_GLSL 1
+#define FSR_RCAS_DENOISE 1
+
+// TODO: support 16-bit floats.
+#include "ffx_a.h"
+
+#define FSR_RCAS_F 1
+AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(GET_DRAW_TEXTURE_2D(inTex), ASU2(p), 0); }
+void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {}
+
+#include "ffx_fsr1.h"
+
+void CurrFilter(AU2 pos)
+{
+ AU4 const0;
+ FsrRcasCon(const0, sharpness);
+
+ AF3 c;
+ FsrRcasF(c.r, c.g, c.b, pos, const0);
+ imageStore(outTex, ASU2(pos), AF4(c, 1));
+}
+
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+void main()
+{
+ // Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
+ AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u);
+ CurrFilter(gxy);
+ gxy.x += 8u;
+ CurrFilter(gxy);
+ gxy.y += 8u;
+ CurrFilter(gxy);
+ gxy.x -= 8u;
+ CurrFilter(gxy);
+}
Property changes on: ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_rcas.cs
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_rcas.xml
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_rcas.xml (nonexistent)
+++ ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_rcas.xml (revision 28010)
@@ -0,0 +1,6 @@
+
+
+
+
+
+
Property changes on: ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_rcas.xml
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_upscale_fsr.cs
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_upscale_fsr.cs (nonexistent)
+++ ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_upscale_fsr.cs (revision 28010)
@@ -0,0 +1,54 @@
+#version 430
+
+#include "common/compute.h"
+
+BEGIN_DRAW_TEXTURES
+ TEXTURE_2D(0, inTex)
+END_DRAW_TEXTURES
+
+BEGIN_DRAW_UNIFORMS
+ UNIFORM(vec4, screenSize)
+END_DRAW_UNIFORMS
+
+STORAGE_2D(0, rgba8, outTex);
+
+#define A_GPU 1
+#define A_GLSL 1
+
+// TODO: support 16-bit floats.
+#include "ffx_a.h"
+
+#define FSR_EASU_F 1
+AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(GET_DRAW_TEXTURE_2D(inTex), p, 0); return res; }
+AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(GET_DRAW_TEXTURE_2D(inTex), p, 1); return res; }
+AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(GET_DRAW_TEXTURE_2D(inTex), p, 2); return res; }
+
+#include "ffx_fsr1.h"
+
+void CurrFilter(AU2 pos)
+{
+ uvec4 const0, const1, const2, const3;
+ FsrEasuCon(
+ const0, const1, const2, const3,
+ screenSize.x, screenSize.y,
+ screenSize.x, screenSize.y,
+ screenSize.z, screenSize.w);
+
+ AF3 c;
+ FsrEasuF(c, pos, const0, const1, const2, const3);
+ imageStore(outTex, ASU2(pos), AF4(c, 1));
+}
+
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+void main()
+{
+ // Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
+ AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u);
+ CurrFilter(gxy);
+ gxy.x += 8u;
+ CurrFilter(gxy);
+ gxy.y += 8u;
+ CurrFilter(gxy);
+ gxy.x -= 8u;
+ CurrFilter(gxy);
+}
Property changes on: ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_upscale_fsr.cs
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_upscale_fsr.xml
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_upscale_fsr.xml (nonexistent)
+++ ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_upscale_fsr.xml (revision 28010)
@@ -0,0 +1,6 @@
+
+
+
+
+
+
Property changes on: ps/trunk/binaries/data/mods/mod/shaders/glsl/compute_upscale_fsr.xml
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: ps/trunk/source/renderer/backend/PipelineState.h
===================================================================
--- ps/trunk/source/renderer/backend/PipelineState.h (revision 28009)
+++ ps/trunk/source/renderer/backend/PipelineState.h (revision 28010)
@@ -1,200 +1,216 @@
-/* Copyright (C) 2022 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#ifndef INCLUDED_RENDERER_BACKEND_PIPELINESTATE
#define INCLUDED_RENDERER_BACKEND_PIPELINESTATE
#include "graphics/Color.h"
#include "renderer/backend/CompareOp.h"
#include "renderer/backend/IDeviceObject.h"
#include "renderer/backend/IShaderProgram.h"
class CStr;
namespace Renderer
{
namespace Backend
{
enum class StencilOp
{
// Keeps the current value.
KEEP,
// Sets the value to zero.
ZERO,
// Sets the value to reference.
REPLACE,
// Increments the value and clamps to the maximum representable unsigned
// value.
INCREMENT_AND_CLAMP,
// Decrements the value and clamps to zero.
DECREMENT_AND_CLAMP,
// Bitwise inverts the value.
INVERT,
// Increments the value and wraps it to zero when incrementing the maximum
// representable unsigned value.
INCREMENT_AND_WRAP,
// Decrements the value and wraps it to the maximum representable unsigned
// value when decrementing zero.
DECREMENT_AND_WRAP
};
struct SStencilOpState
{
StencilOp failOp;
StencilOp passOp;
StencilOp depthFailOp;
CompareOp compareOp;
};
struct SDepthStencilStateDesc
{
bool depthTestEnabled;
CompareOp depthCompareOp;
bool depthWriteEnabled;
bool stencilTestEnabled;
uint32_t stencilReadMask;
uint32_t stencilWriteMask;
uint32_t stencilReference;
SStencilOpState stencilFrontFace;
SStencilOpState stencilBackFace;
};
// TODO: add per constant description.
enum class BlendFactor
{
ZERO,
ONE,
SRC_COLOR,
ONE_MINUS_SRC_COLOR,
DST_COLOR,
ONE_MINUS_DST_COLOR,
SRC_ALPHA,
ONE_MINUS_SRC_ALPHA,
DST_ALPHA,
ONE_MINUS_DST_ALPHA,
CONSTANT_COLOR,
ONE_MINUS_CONSTANT_COLOR,
CONSTANT_ALPHA,
ONE_MINUS_CONSTANT_ALPHA,
SRC_ALPHA_SATURATE,
SRC1_COLOR,
ONE_MINUS_SRC1_COLOR,
SRC1_ALPHA,
ONE_MINUS_SRC1_ALPHA,
};
enum class BlendOp
{
ADD,
SUBTRACT,
REVERSE_SUBTRACT,
MIN,
MAX
};
// Using a namespace instead of a enum allows using the same syntax while
// avoiding adding operator overrides and additional checks on casts.
namespace ColorWriteMask
{
constexpr uint8_t RED = 0x01;
constexpr uint8_t GREEN = 0x02;
constexpr uint8_t BLUE = 0x04;
constexpr uint8_t ALPHA = 0x08;
} // namespace ColorWriteMask
struct SBlendStateDesc
{
bool enabled;
BlendFactor srcColorBlendFactor;
BlendFactor dstColorBlendFactor;
BlendOp colorBlendOp;
BlendFactor srcAlphaBlendFactor;
BlendFactor dstAlphaBlendFactor;
BlendOp alphaBlendOp;
CColor constant;
uint8_t colorWriteMask;
};
enum class PolygonMode
{
FILL,
LINE
};
enum class CullMode
{
NONE,
FRONT,
BACK
};
enum class FrontFace
{
COUNTER_CLOCKWISE,
CLOCKWISE
};
struct SRasterizationStateDesc
{
PolygonMode polygonMode;
CullMode cullMode;
FrontFace frontFace;
bool depthBiasEnabled;
float depthBiasConstantFactor;
float depthBiasSlopeFactor;
};
struct SGraphicsPipelineStateDesc
{
// It's a backend client reponsibility to keep the shader program alive
// while it's bound.
IShaderProgram* shaderProgram;
SDepthStencilStateDesc depthStencilState;
SBlendStateDesc blendState;
SRasterizationStateDesc rasterizationState;
};
+struct SComputePipelineStateDesc
+{
+ // It's a backend client reponsibility to keep the shader program alive
+ // while it's bound.
+ IShaderProgram* shaderProgram;
+};
+
// We don't provide additional helpers intentionally because all custom states
// should be described with a related shader and should be switched together.
SGraphicsPipelineStateDesc MakeDefaultGraphicsPipelineStateDesc();
StencilOp ParseStencilOp(const CStr& str);
BlendFactor ParseBlendFactor(const CStr& str);
BlendOp ParseBlendOp(const CStr& str);
PolygonMode ParsePolygonMode(const CStr& str);
CullMode ParseCullMode(const CStr& str);
FrontFace ParseFrontFace(const CStr& str);
/**
* A holder for precompiled graphics pipeline description.
*/
class IGraphicsPipelineState : public IDeviceObject
{
public:
virtual IShaderProgram* GetShaderProgram() const = 0;
};
+/**
+ * A holder for precompiled compute pipeline description.
+ */
+class IComputePipelineState : public IDeviceObject
+{
+public:
+ virtual IShaderProgram* GetShaderProgram() const = 0;
+};
+
} // namespace Backend
} // namespace Renderer
#endif // INCLUDED_RENDERER_BACKEND_PIPELINESTATE
Index: ps/trunk/source/renderer/backend/dummy/DeviceCommandContext.cpp
===================================================================
--- ps/trunk/source/renderer/backend/dummy/DeviceCommandContext.cpp (revision 28009)
+++ ps/trunk/source/renderer/backend/dummy/DeviceCommandContext.cpp (revision 28010)
@@ -1,218 +1,239 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#include "precompiled.h"
#include "DeviceCommandContext.h"
#include "renderer/backend/dummy/Buffer.h"
#include "renderer/backend/dummy/Device.h"
#include "renderer/backend/dummy/Framebuffer.h"
#include "renderer/backend/dummy/ShaderProgram.h"
#include "renderer/backend/dummy/Texture.h"
namespace Renderer
{
namespace Backend
{
namespace Dummy
{
// static
std::unique_ptr CDeviceCommandContext::Create(CDevice* device)
{
std::unique_ptr deviceCommandContext(new CDeviceCommandContext());
deviceCommandContext->m_Device = device;
return deviceCommandContext;
}
CDeviceCommandContext::CDeviceCommandContext() = default;
CDeviceCommandContext::~CDeviceCommandContext() = default;
IDevice* CDeviceCommandContext::GetDevice()
{
return m_Device;
}
void CDeviceCommandContext::SetGraphicsPipelineState(
IGraphicsPipelineState*)
{
}
+void CDeviceCommandContext::SetComputePipelineState(
+ IComputePipelineState*)
+{
+}
+
void CDeviceCommandContext::UploadTexture(
ITexture*, const Format, const void*, const size_t,
const uint32_t, const uint32_t)
{
}
void CDeviceCommandContext::UploadTextureRegion(
ITexture*, const Format, const void*, const size_t,
const uint32_t, const uint32_t, const uint32_t, const uint32_t,
const uint32_t, const uint32_t)
{
}
void CDeviceCommandContext::UploadBuffer(IBuffer*, const void*, const uint32_t)
{
}
void CDeviceCommandContext::UploadBuffer(IBuffer*, const UploadBufferFunction&)
{
}
void CDeviceCommandContext::UploadBufferRegion(
IBuffer*, const void*, const uint32_t, const uint32_t)
{
}
void CDeviceCommandContext::UploadBufferRegion(
IBuffer*, const uint32_t, const uint32_t, const UploadBufferFunction&)
{
}
void CDeviceCommandContext::BeginScopedLabel(const char*)
{
}
void CDeviceCommandContext::EndScopedLabel()
{
}
void CDeviceCommandContext::Flush()
{
}
void CDeviceCommandContext::BlitFramebuffer(
IFramebuffer*, IFramebuffer*, const Rect&, const Rect&, const Sampler::Filter)
{
}
void CDeviceCommandContext::ResolveFramebuffer(IFramebuffer*, IFramebuffer*)
{
}
void CDeviceCommandContext::ClearFramebuffer(const bool, const bool, const bool)
{
}
void CDeviceCommandContext::BeginFramebufferPass(IFramebuffer*)
{
}
void CDeviceCommandContext::EndFramebufferPass()
{
}
void CDeviceCommandContext::ReadbackFramebufferSync(
const uint32_t, const uint32_t, const uint32_t, const uint32_t, void*)
{
}
void CDeviceCommandContext::SetScissors(const uint32_t, const Rect*)
{
}
void CDeviceCommandContext::SetViewports(const uint32_t, const Rect*)
{
}
void CDeviceCommandContext::SetVertexInputLayout(IVertexInputLayout*)
{
}
void CDeviceCommandContext::SetVertexBuffer(const uint32_t, IBuffer*, const uint32_t)
{
}
void CDeviceCommandContext::SetVertexBufferData(
const uint32_t, const void*, const uint32_t)
{
}
void CDeviceCommandContext::SetIndexBuffer(IBuffer*)
{
}
void CDeviceCommandContext::SetIndexBufferData(const void*, const uint32_t)
{
}
void CDeviceCommandContext::BeginPass()
{
}
void CDeviceCommandContext::EndPass()
{
}
void CDeviceCommandContext::Draw(const uint32_t, const uint32_t)
{
}
void CDeviceCommandContext::DrawIndexed(const uint32_t, const uint32_t, const int32_t)
{
}
void CDeviceCommandContext::DrawInstanced(
const uint32_t, const uint32_t, const uint32_t, const uint32_t)
{
}
void CDeviceCommandContext::DrawIndexedInstanced(
const uint32_t, const uint32_t, const uint32_t, const uint32_t, const int32_t)
{
}
void CDeviceCommandContext::DrawIndexedInRange(
const uint32_t, const uint32_t, const uint32_t, const uint32_t)
{
}
+void CDeviceCommandContext::BeginComputePass()
+{
+}
+
+void CDeviceCommandContext::EndComputePass()
+{
+}
+
+void CDeviceCommandContext::Dispatch(const uint32_t, const uint32_t, const uint32_t)
+{
+}
+
void CDeviceCommandContext::SetTexture(const int32_t, ITexture*)
{
}
+void CDeviceCommandContext::SetStorageTexture(const int32_t, ITexture*)
+{
+}
+
void CDeviceCommandContext::SetUniform(const int32_t, const float)
{
}
void CDeviceCommandContext::SetUniform(const int32_t, const float, const float)
{
}
void CDeviceCommandContext::SetUniform(
const int32_t, const float, const float, const float)
{
}
void CDeviceCommandContext::SetUniform(
const int32_t, const float, const float, const float, const float)
{
}
void CDeviceCommandContext::SetUniform(const int32_t, PS::span)
{
}
} // namespace Dummy
} // namespace Backend
} // namespace Renderer
Index: ps/trunk/source/renderer/backend/dummy/PipelineState.h
===================================================================
--- ps/trunk/source/renderer/backend/dummy/PipelineState.h (revision 28009)
+++ ps/trunk/source/renderer/backend/dummy/PipelineState.h (revision 28010)
@@ -1,67 +1,89 @@
-/* Copyright (C) 2022 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#ifndef INCLUDED_RENDERER_BACKEND_DUMMY_PIPELINESTATE
#define INCLUDED_RENDERER_BACKEND_DUMMY_PIPELINESTATE
#include "renderer/backend/PipelineState.h"
#include
#include
namespace Renderer
{
namespace Backend
{
namespace Dummy
{
class CDevice;
class CGraphicsPipelineState final : public IGraphicsPipelineState
{
public:
~CGraphicsPipelineState() override = default;
IDevice* GetDevice() override;
IShaderProgram* GetShaderProgram() const override { return m_Desc.shaderProgram; }
const SGraphicsPipelineStateDesc& GetDesc() const { return m_Desc; }
private:
friend class CDevice;
static std::unique_ptr Create(
CDevice* device, const SGraphicsPipelineStateDesc& desc);
CGraphicsPipelineState() = default;
CDevice* m_Device = nullptr;
SGraphicsPipelineStateDesc m_Desc{};
};
+class CComputePipelineState final : public IComputePipelineState
+{
+public:
+ ~CComputePipelineState() override = default;
+
+ IDevice* GetDevice() override;
+
+ IShaderProgram* GetShaderProgram() const override { return m_Desc.shaderProgram; }
+
+private:
+ friend class CDevice;
+
+ static std::unique_ptr Create(
+ CDevice* device, const SComputePipelineStateDesc& desc);
+
+ CComputePipelineState() = default;
+
+ CDevice* m_Device = nullptr;
+
+ SComputePipelineStateDesc m_Desc{};
+};
+
} // namespace Dummy
} // namespace Backend
} // namespace Renderer
#endif // INCLUDED_RENDERER_BACKEND_DUMMY_PIPELINESTATE
Index: ps/trunk/source/renderer/backend/gl/DeviceCommandContext.cpp
===================================================================
--- ps/trunk/source/renderer/backend/gl/DeviceCommandContext.cpp (revision 28009)
+++ ps/trunk/source/renderer/backend/gl/DeviceCommandContext.cpp (revision 28010)
@@ -1,1346 +1,1416 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#include "precompiled.h"
#include "DeviceCommandContext.h"
#include "ps/CLogger.h"
#include "renderer/backend/gl/Buffer.h"
#include "renderer/backend/gl/Device.h"
#include "renderer/backend/gl/Framebuffer.h"
#include "renderer/backend/gl/Mapping.h"
#include "renderer/backend/gl/PipelineState.h"
#include "renderer/backend/gl/ShaderProgram.h"
#include "renderer/backend/gl/Texture.h"
#include
#include
#include
namespace Renderer
{
namespace Backend
{
namespace GL
{
namespace
{
bool operator==(const SStencilOpState& lhs, const SStencilOpState& rhs)
{
return
lhs.failOp == rhs.failOp &&
lhs.passOp == rhs.passOp &&
lhs.depthFailOp == rhs.depthFailOp &&
lhs.compareOp == rhs.compareOp;
}
bool operator!=(const SStencilOpState& lhs, const SStencilOpState& rhs)
{
return !operator==(lhs, rhs);
}
bool operator==(
const CDeviceCommandContext::Rect& lhs,
const CDeviceCommandContext::Rect& rhs)
{
return
lhs.x == rhs.x && lhs.y == rhs.y &&
lhs.width == rhs.width && lhs.height == rhs.height;
}
bool operator!=(
const CDeviceCommandContext::Rect& lhs,
const CDeviceCommandContext::Rect& rhs)
{
return !operator==(lhs, rhs);
}
void ApplyDepthMask(const bool depthWriteEnabled)
{
glDepthMask(depthWriteEnabled ? GL_TRUE : GL_FALSE);
}
void ApplyColorMask(const uint8_t colorWriteMask)
{
glColorMask(
(colorWriteMask & ColorWriteMask::RED) != 0 ? GL_TRUE : GL_FALSE,
(colorWriteMask & ColorWriteMask::GREEN) != 0 ? GL_TRUE : GL_FALSE,
(colorWriteMask & ColorWriteMask::BLUE) != 0 ? GL_TRUE : GL_FALSE,
(colorWriteMask & ColorWriteMask::ALPHA) != 0 ? GL_TRUE : GL_FALSE);
}
void ApplyStencilMask(const uint32_t stencilWriteMask)
{
glStencilMask(stencilWriteMask);
}
GLenum BufferTypeToGLTarget(const CBuffer::Type type)
{
GLenum target = GL_ARRAY_BUFFER;
switch (type)
{
case CBuffer::Type::VERTEX:
target = GL_ARRAY_BUFFER;
break;
case CBuffer::Type::INDEX:
target = GL_ELEMENT_ARRAY_BUFFER;
break;
case CBuffer::Type::UPLOAD:
case CBuffer::Type::UNIFORM:
debug_warn("Unsupported buffer type.");
break;
};
return target;
}
void UploadDynamicBufferRegionImpl(
const GLenum target, const uint32_t bufferSize,
const uint32_t dataOffset, const uint32_t dataSize,
const CDeviceCommandContext::UploadBufferFunction& uploadFunction)
{
ENSURE(dataOffset < dataSize);
// Tell the driver that it can reallocate the whole VBO
glBufferDataARB(target, bufferSize, nullptr, GL_DYNAMIC_DRAW);
ogl_WarnIfError();
while (true)
{
// (In theory, glMapBufferRange with GL_MAP_INVALIDATE_BUFFER_BIT could be used
// here instead of glBufferData(..., NULL, ...) plus glMapBuffer(), but with
// current Intel Windows GPU drivers (as of 2015-01) it's much faster if you do
// the explicit glBufferData.)
void* mappedData = glMapBufferARB(target, GL_WRITE_ONLY);
if (mappedData == nullptr)
{
// This shouldn't happen unless we run out of virtual address space
LOGERROR("glMapBuffer failed");
break;
}
uploadFunction(static_cast(mappedData) + dataOffset);
if (glUnmapBufferARB(target) == GL_TRUE)
break;
// Unmap might fail on e.g. resolution switches, so just try again
// and hope it will eventually succeed
LOGMESSAGE("glUnmapBuffer failed, trying again...\n");
}
}
/**
* In case we don't need a framebuffer content (because of the following clear
* or overwriting by a shader) we might give a hint to a driver via
* glInvalidateFramebuffer.
*/
void InvalidateFramebuffer(
CFramebuffer* framebuffer, const bool color, const bool depthStencil)
{
GLsizei numberOfAttachments = 0;
GLenum attachments[8];
const bool isBackbuffer = framebuffer->GetHandle() == 0;
if (color && (framebuffer->GetAttachmentMask() & GL_COLOR_BUFFER_BIT))
{
if (isBackbuffer)
#if CONFIG2_GLES
attachments[numberOfAttachments++] = GL_COLOR_EXT;
#else
attachments[numberOfAttachments++] = GL_COLOR;
#endif
else
attachments[numberOfAttachments++] = GL_COLOR_ATTACHMENT0;
}
if (depthStencil)
{
if (isBackbuffer)
{
if (framebuffer->GetAttachmentMask() & GL_DEPTH_BUFFER_BIT)
#if CONFIG2_GLES
attachments[numberOfAttachments++] = GL_DEPTH_EXT;
#else
attachments[numberOfAttachments++] = GL_DEPTH;
#endif
if (framebuffer->GetAttachmentMask() & GL_STENCIL_BUFFER_BIT)
#if CONFIG2_GLES
attachments[numberOfAttachments++] = GL_STENCIL_EXT;
#else
attachments[numberOfAttachments++] = GL_STENCIL;
#endif
}
else
{
if (framebuffer->GetAttachmentMask() & GL_DEPTH_BUFFER_BIT)
attachments[numberOfAttachments++] = GL_DEPTH_ATTACHMENT;
if (framebuffer->GetAttachmentMask() & GL_STENCIL_BUFFER_BIT)
attachments[numberOfAttachments++] = GL_STENCIL_ATTACHMENT;
}
}
if (numberOfAttachments > 0)
{
#if CONFIG2_GLES
glDiscardFramebufferEXT(GL_FRAMEBUFFER_EXT, numberOfAttachments, attachments);
#else
glInvalidateFramebuffer(GL_FRAMEBUFFER_EXT, numberOfAttachments, attachments);
#endif
ogl_WarnIfError();
}
}
} // anonymous namespace
// static
std::unique_ptr CDeviceCommandContext::Create(CDevice* device)
{
std::unique_ptr deviceCommandContext(new CDeviceCommandContext(device));
deviceCommandContext->m_Framebuffer = device->GetCurrentBackbuffer(
Renderer::Backend::AttachmentLoadOp::DONT_CARE,
Renderer::Backend::AttachmentStoreOp::DONT_CARE,
Renderer::Backend::AttachmentLoadOp::DONT_CARE,
Renderer::Backend::AttachmentStoreOp::DONT_CARE)->As();
deviceCommandContext->ResetStates();
return deviceCommandContext;
}
CDeviceCommandContext::CDeviceCommandContext(CDevice* device)
: m_Device(device)
{
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, 0);
for (BindUnit& unit : m_BoundTextures)
{
unit.target = GL_TEXTURE_2D;
unit.handle = 0;
}
for (size_t index = 0; index < m_VertexAttributeFormat.size(); ++index)
{
m_VertexAttributeFormat[index].active = false;
m_VertexAttributeFormat[index].initialized = false;
m_VertexAttributeFormat[index].bindingSlot = 0;
}
for (size_t index = 0; index < m_BoundBuffers.size(); ++index)
{
const CBuffer::Type type = static_cast(index);
const GLenum target = BufferTypeToGLTarget(type);
const GLuint handle = 0;
m_BoundBuffers[index].first = target;
m_BoundBuffers[index].second = handle;
}
}
CDeviceCommandContext::~CDeviceCommandContext() = default;
IDevice* CDeviceCommandContext::GetDevice()
{
return m_Device;
}
void CDeviceCommandContext::SetGraphicsPipelineState(
const SGraphicsPipelineStateDesc& pipelineState)
{
+ ENSURE(!pipelineState.shaderProgram || m_InsideFramebufferPass);
SetGraphicsPipelineStateImpl(pipelineState, false);
}
void CDeviceCommandContext::SetGraphicsPipelineState(
IGraphicsPipelineState* pipelineState)
{
+ ENSURE(!pipelineState->GetShaderProgram() || m_InsideFramebufferPass);
ENSURE(pipelineState);
SetGraphicsPipelineStateImpl(
pipelineState->As()->GetDesc(), false);
}
+void CDeviceCommandContext::SetComputePipelineState(
+ IComputePipelineState* pipelineState)
+{
+ ENSURE(m_InsideComputePass);
+ ENSURE(pipelineState);
+ const SComputePipelineStateDesc& desc = pipelineState->As()->GetDesc();
+ if (m_ComputePipelineStateDesc.shaderProgram != desc.shaderProgram)
+ {
+ CShaderProgram* currentShaderProgram = nullptr;
+ if (m_ComputePipelineStateDesc.shaderProgram)
+ currentShaderProgram = m_ComputePipelineStateDesc.shaderProgram->As();
+ CShaderProgram* nextShaderProgram = nullptr;
+ if (desc.shaderProgram)
+ nextShaderProgram = desc.shaderProgram->As();
+
+ if (nextShaderProgram)
+ nextShaderProgram->Bind(currentShaderProgram);
+ else if (currentShaderProgram)
+ currentShaderProgram->Unbind();
+
+ m_ShaderProgram = nextShaderProgram;
+ }
+}
+
void CDeviceCommandContext::UploadTexture(
ITexture* texture, const Format format,
const void* data, const size_t dataSize,
const uint32_t level, const uint32_t layer)
{
UploadTextureRegion(texture, format, data, dataSize,
0, 0,
std::max(1u, texture->GetWidth() >> level),
std::max(1u, texture->GetHeight() >> level),
level, layer);
}
void CDeviceCommandContext::UploadTextureRegion(
ITexture* destinationTexture, const Format dataFormat,
const void* data, const size_t dataSize,
const uint32_t xOffset, const uint32_t yOffset,
const uint32_t width, const uint32_t height,
const uint32_t level, const uint32_t layer)
{
ENSURE(destinationTexture);
CTexture* texture = destinationTexture->As();
ENSURE(texture->GetUsage() & Renderer::Backend::ITexture::Usage::TRANSFER_DST);
ENSURE(width > 0 && height > 0);
if (texture->GetType() == CTexture::Type::TEXTURE_2D)
{
ENSURE(layer == 0);
if (texture->GetFormat() == Format::R8G8B8A8_UNORM ||
texture->GetFormat() == Format::R8G8B8_UNORM ||
#if !CONFIG2_GLES
texture->GetFormat() == Format::R8_UNORM ||
#endif
texture->GetFormat() == Format::A8_UNORM)
{
ENSURE(texture->GetFormat() == dataFormat);
size_t bytesPerPixel = 4;
GLenum pixelFormat = GL_RGBA;
switch (dataFormat)
{
case Format::R8G8B8A8_UNORM:
break;
case Format::R8G8B8_UNORM:
pixelFormat = GL_RGB;
bytesPerPixel = 3;
break;
#if !CONFIG2_GLES
case Format::R8_UNORM:
pixelFormat = GL_RED;
bytesPerPixel = 1;
break;
#endif
case Format::A8_UNORM:
pixelFormat = GL_ALPHA;
bytesPerPixel = 1;
break;
case Format::L8_UNORM:
pixelFormat = GL_LUMINANCE;
bytesPerPixel = 1;
break;
default:
debug_warn("Unexpected format.");
break;
}
ENSURE(dataSize == width * height * bytesPerPixel);
ScopedBind scopedBind(this, GL_TEXTURE_2D, texture->GetHandle());
glTexSubImage2D(GL_TEXTURE_2D, level,
xOffset, yOffset, width, height,
pixelFormat, GL_UNSIGNED_BYTE, data);
ogl_WarnIfError();
}
else if (
texture->GetFormat() == Format::BC1_RGB_UNORM ||
texture->GetFormat() == Format::BC1_RGBA_UNORM ||
texture->GetFormat() == Format::BC2_UNORM ||
texture->GetFormat() == Format::BC3_UNORM)
{
ENSURE(xOffset == 0 && yOffset == 0);
ENSURE(texture->GetFormat() == dataFormat);
// TODO: add data size check.
GLenum internalFormat = GL_COMPRESSED_RGB_S3TC_DXT1_EXT;
switch (texture->GetFormat())
{
case Format::BC1_RGBA_UNORM:
internalFormat = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
break;
case Format::BC2_UNORM:
internalFormat = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
break;
case Format::BC3_UNORM:
internalFormat = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
break;
default:
break;
}
ScopedBind scopedBind(this, GL_TEXTURE_2D, texture->GetHandle());
glCompressedTexImage2DARB(GL_TEXTURE_2D, level, internalFormat, width, height, 0, dataSize, data);
ogl_WarnIfError();
}
else
debug_warn("Unsupported format");
}
else if (texture->GetType() == CTexture::Type::TEXTURE_CUBE)
{
if (texture->GetFormat() == Format::R8G8B8A8_UNORM)
{
ENSURE(texture->GetFormat() == dataFormat);
ENSURE(level == 0 && layer < 6);
ENSURE(xOffset == 0 && yOffset == 0 && texture->GetWidth() == width && texture->GetHeight() == height);
const size_t bpp = 4;
ENSURE(dataSize == width * height * bpp);
// The order of layers should be the following:
// front, back, top, bottom, right, left
static const GLenum targets[6] =
{
GL_TEXTURE_CUBE_MAP_POSITIVE_X,
GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
GL_TEXTURE_CUBE_MAP_NEGATIVE_Z
};
ScopedBind scopedBind(this, GL_TEXTURE_CUBE_MAP, texture->GetHandle());
glTexImage2D(targets[layer], level, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, data);
ogl_WarnIfError();
}
else
debug_warn("Unsupported format");
}
else
debug_warn("Unsupported type");
}
void CDeviceCommandContext::UploadBuffer(IBuffer* buffer, const void* data, const uint32_t dataSize)
{
ENSURE(!m_InsideFramebufferPass);
UploadBufferRegion(buffer, data, dataSize, 0);
}
void CDeviceCommandContext::UploadBuffer(
IBuffer* buffer, const UploadBufferFunction& uploadFunction)
{
ENSURE(!m_InsideFramebufferPass);
UploadBufferRegion(buffer, 0, buffer->GetSize(), uploadFunction);
}
void CDeviceCommandContext::UploadBufferRegion(
IBuffer* buffer, const void* data, const uint32_t dataOffset, const uint32_t dataSize)
{
ENSURE(!m_InsideFramebufferPass);
ENSURE(data);
ENSURE(dataOffset + dataSize <= buffer->GetSize());
const GLenum target = BufferTypeToGLTarget(buffer->GetType());
ScopedBufferBind scopedBufferBind(this, buffer->As());
if (buffer->IsDynamic())
{
UploadDynamicBufferRegionImpl(target, buffer->GetSize(), dataOffset, dataSize, [data, dataSize](u8* mappedData)
{
std::memcpy(mappedData, data, dataSize);
});
}
else
{
glBufferSubDataARB(target, dataOffset, dataSize, data);
ogl_WarnIfError();
}
}
void CDeviceCommandContext::UploadBufferRegion(
IBuffer* buffer, const uint32_t dataOffset, const uint32_t dataSize,
const UploadBufferFunction& uploadFunction)
{
ENSURE(!m_InsideFramebufferPass);
ENSURE(dataOffset + dataSize <= buffer->GetSize());
const GLenum target = BufferTypeToGLTarget(buffer->GetType());
ScopedBufferBind scopedBufferBind(this, buffer->As());
ENSURE(buffer->IsDynamic());
UploadDynamicBufferRegionImpl(target, buffer->GetSize(), dataOffset, dataSize, uploadFunction);
}
void CDeviceCommandContext::BeginScopedLabel(const char* name)
{
if (!m_Device->GetCapabilities().debugScopedLabels)
return;
++m_ScopedLabelDepth;
glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0x0AD, -1, name);
}
void CDeviceCommandContext::EndScopedLabel()
{
if (!m_Device->GetCapabilities().debugScopedLabels)
return;
ENSURE(m_ScopedLabelDepth > 0);
--m_ScopedLabelDepth;
glPopDebugGroup();
}
void CDeviceCommandContext::BindTexture(
const uint32_t unit, const GLenum target, const GLuint handle)
{
ENSURE(unit < m_BoundTextures.size());
#if CONFIG2_GLES
ENSURE(target == GL_TEXTURE_2D || target == GL_TEXTURE_CUBE_MAP);
#else
ENSURE(target == GL_TEXTURE_2D || target == GL_TEXTURE_CUBE_MAP || target == GL_TEXTURE_2D_MULTISAMPLE);
#endif
if (m_ActiveTextureUnit != unit)
{
glActiveTexture(GL_TEXTURE0 + unit);
m_ActiveTextureUnit = unit;
}
if (m_BoundTextures[unit].target == target && m_BoundTextures[unit].handle == handle)
return;
if (m_BoundTextures[unit].target != target && m_BoundTextures[unit].target && m_BoundTextures[unit].handle)
glBindTexture(m_BoundTextures[unit].target, 0);
if (m_BoundTextures[unit].handle != handle)
glBindTexture(target, handle);
ogl_WarnIfError();
m_BoundTextures[unit] = {target, handle};
}
void CDeviceCommandContext::BindBuffer(const IBuffer::Type type, CBuffer* buffer)
{
ENSURE(!buffer || buffer->GetType() == type);
if (type == IBuffer::Type::VERTEX)
{
if (m_VertexBuffer == buffer)
return;
m_VertexBuffer = buffer;
}
else if (type == IBuffer::Type::INDEX)
{
if (!buffer)
m_IndexBuffer = nullptr;
m_IndexBufferData = nullptr;
}
const GLenum target = BufferTypeToGLTarget(type);
const GLuint handle = buffer ? buffer->GetHandle() : 0;
glBindBufferARB(target, handle);
ogl_WarnIfError();
const size_t cacheIndex = static_cast(type);
ENSURE(cacheIndex < m_BoundBuffers.size());
m_BoundBuffers[cacheIndex].second = handle;
}
void CDeviceCommandContext::OnTextureDestroy(CTexture* texture)
{
ENSURE(texture);
for (size_t index = 0; index < m_BoundTextures.size(); ++index)
if (m_BoundTextures[index].handle == texture->GetHandle())
BindTexture(index, GL_TEXTURE_2D, 0);
}
void CDeviceCommandContext::Flush()
{
ENSURE(m_ScopedLabelDepth == 0);
+ ENSURE(!m_InsideFramebufferPass);
+ ENSURE(!m_InsideComputePass);
GPU_SCOPED_LABEL(this, "CDeviceCommandContext::Flush");
ResetStates();
m_IndexBuffer = nullptr;
m_IndexBufferData = nullptr;
for (size_t unit = 0; unit < m_BoundTextures.size(); ++unit)
{
if (m_BoundTextures[unit].handle)
BindTexture(unit, GL_TEXTURE_2D, 0);
}
BindBuffer(CBuffer::Type::INDEX, nullptr);
BindBuffer(CBuffer::Type::VERTEX, nullptr);
}
void CDeviceCommandContext::ResetStates()
{
SetGraphicsPipelineStateImpl(MakeDefaultGraphicsPipelineStateDesc(), true);
SetScissors(0, nullptr);
m_Framebuffer = m_Device->GetCurrentBackbuffer(
Renderer::Backend::AttachmentLoadOp::DONT_CARE,
Renderer::Backend::AttachmentStoreOp::DONT_CARE,
Renderer::Backend::AttachmentLoadOp::DONT_CARE,
Renderer::Backend::AttachmentStoreOp::DONT_CARE)->As();
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, m_Framebuffer->GetHandle());
ogl_WarnIfError();
}
void CDeviceCommandContext::SetGraphicsPipelineStateImpl(
const SGraphicsPipelineStateDesc& pipelineStateDesc, const bool force)
{
ENSURE(!m_InsidePass);
if (m_GraphicsPipelineStateDesc.shaderProgram != pipelineStateDesc.shaderProgram)
{
CShaderProgram* currentShaderProgram = nullptr;
if (m_GraphicsPipelineStateDesc.shaderProgram)
{
currentShaderProgram =
static_cast(m_GraphicsPipelineStateDesc.shaderProgram);
}
CShaderProgram* nextShaderProgram = nullptr;
if (pipelineStateDesc.shaderProgram)
{
nextShaderProgram =
static_cast(pipelineStateDesc.shaderProgram);
for (size_t index = 0; index < m_VertexAttributeFormat.size(); ++index)
{
const VertexAttributeStream stream = static_cast(index);
m_VertexAttributeFormat[index].active = nextShaderProgram->IsStreamActive(stream);
m_VertexAttributeFormat[index].initialized = false;
m_VertexAttributeFormat[index].bindingSlot = std::numeric_limits::max();
}
}
if (nextShaderProgram)
nextShaderProgram->Bind(currentShaderProgram);
else if (currentShaderProgram)
currentShaderProgram->Unbind();
m_ShaderProgram = nextShaderProgram;
}
const SDepthStencilStateDesc& currentDepthStencilStateDesc = m_GraphicsPipelineStateDesc.depthStencilState;
const SDepthStencilStateDesc& nextDepthStencilStateDesc = pipelineStateDesc.depthStencilState;
if (force || currentDepthStencilStateDesc.depthTestEnabled != nextDepthStencilStateDesc.depthTestEnabled)
{
if (nextDepthStencilStateDesc.depthTestEnabled)
glEnable(GL_DEPTH_TEST);
else
glDisable(GL_DEPTH_TEST);
}
if (force || currentDepthStencilStateDesc.depthCompareOp != nextDepthStencilStateDesc.depthCompareOp)
{
glDepthFunc(Mapping::FromCompareOp(nextDepthStencilStateDesc.depthCompareOp));
}
if (force || currentDepthStencilStateDesc.depthWriteEnabled != nextDepthStencilStateDesc.depthWriteEnabled)
{
ApplyDepthMask(nextDepthStencilStateDesc.depthWriteEnabled);
}
if (force || currentDepthStencilStateDesc.stencilTestEnabled != nextDepthStencilStateDesc.stencilTestEnabled)
{
if (nextDepthStencilStateDesc.stencilTestEnabled)
glEnable(GL_STENCIL_TEST);
else
glDisable(GL_STENCIL_TEST);
}
if (force ||
currentDepthStencilStateDesc.stencilFrontFace != nextDepthStencilStateDesc.stencilFrontFace ||
currentDepthStencilStateDesc.stencilBackFace != nextDepthStencilStateDesc.stencilBackFace)
{
if (nextDepthStencilStateDesc.stencilFrontFace == nextDepthStencilStateDesc.stencilBackFace)
{
glStencilOp(
Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilFrontFace.failOp),
Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilFrontFace.depthFailOp),
Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilFrontFace.passOp));
}
else
{
if (force || currentDepthStencilStateDesc.stencilFrontFace != nextDepthStencilStateDesc.stencilFrontFace)
{
glStencilOpSeparate(
GL_FRONT,
Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilFrontFace.failOp),
Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilFrontFace.depthFailOp),
Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilFrontFace.passOp));
}
if (force || currentDepthStencilStateDesc.stencilBackFace != nextDepthStencilStateDesc.stencilBackFace)
{
glStencilOpSeparate(
GL_BACK,
Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilBackFace.failOp),
Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilBackFace.depthFailOp),
Mapping::FromStencilOp(nextDepthStencilStateDesc.stencilBackFace.passOp));
}
}
}
if (force || currentDepthStencilStateDesc.stencilWriteMask != nextDepthStencilStateDesc.stencilWriteMask)
{
ApplyStencilMask(nextDepthStencilStateDesc.stencilWriteMask);
}
if (force ||
currentDepthStencilStateDesc.stencilReference != nextDepthStencilStateDesc.stencilReference ||
currentDepthStencilStateDesc.stencilReadMask != nextDepthStencilStateDesc.stencilReadMask ||
currentDepthStencilStateDesc.stencilFrontFace.compareOp != nextDepthStencilStateDesc.stencilFrontFace.compareOp ||
currentDepthStencilStateDesc.stencilBackFace.compareOp != nextDepthStencilStateDesc.stencilBackFace.compareOp)
{
if (nextDepthStencilStateDesc.stencilFrontFace.compareOp == nextDepthStencilStateDesc.stencilBackFace.compareOp)
{
glStencilFunc(
Mapping::FromCompareOp(nextDepthStencilStateDesc.stencilFrontFace.compareOp),
nextDepthStencilStateDesc.stencilReference,
nextDepthStencilStateDesc.stencilReadMask);
}
else
{
glStencilFuncSeparate(GL_FRONT,
Mapping::FromCompareOp(nextDepthStencilStateDesc.stencilFrontFace.compareOp),
nextDepthStencilStateDesc.stencilReference,
nextDepthStencilStateDesc.stencilReadMask);
glStencilFuncSeparate(GL_BACK,
Mapping::FromCompareOp(nextDepthStencilStateDesc.stencilBackFace.compareOp),
nextDepthStencilStateDesc.stencilReference,
nextDepthStencilStateDesc.stencilReadMask);
}
}
const SBlendStateDesc& currentBlendStateDesc = m_GraphicsPipelineStateDesc.blendState;
const SBlendStateDesc& nextBlendStateDesc = pipelineStateDesc.blendState;
if (force || currentBlendStateDesc.enabled != nextBlendStateDesc.enabled)
{
if (nextBlendStateDesc.enabled)
glEnable(GL_BLEND);
else
glDisable(GL_BLEND);
}
if (force ||
currentBlendStateDesc.srcColorBlendFactor != nextBlendStateDesc.srcColorBlendFactor ||
currentBlendStateDesc.srcAlphaBlendFactor != nextBlendStateDesc.srcAlphaBlendFactor ||
currentBlendStateDesc.dstColorBlendFactor != nextBlendStateDesc.dstColorBlendFactor ||
currentBlendStateDesc.dstAlphaBlendFactor != nextBlendStateDesc.dstAlphaBlendFactor)
{
if (nextBlendStateDesc.srcColorBlendFactor == nextBlendStateDesc.srcAlphaBlendFactor &&
nextBlendStateDesc.dstColorBlendFactor == nextBlendStateDesc.dstAlphaBlendFactor)
{
glBlendFunc(
Mapping::FromBlendFactor(nextBlendStateDesc.srcColorBlendFactor),
Mapping::FromBlendFactor(nextBlendStateDesc.dstColorBlendFactor));
}
else
{
glBlendFuncSeparate(
Mapping::FromBlendFactor(nextBlendStateDesc.srcColorBlendFactor),
Mapping::FromBlendFactor(nextBlendStateDesc.dstColorBlendFactor),
Mapping::FromBlendFactor(nextBlendStateDesc.srcAlphaBlendFactor),
Mapping::FromBlendFactor(nextBlendStateDesc.dstAlphaBlendFactor));
}
}
if (force ||
currentBlendStateDesc.colorBlendOp != nextBlendStateDesc.colorBlendOp ||
currentBlendStateDesc.alphaBlendOp != nextBlendStateDesc.alphaBlendOp)
{
if (nextBlendStateDesc.colorBlendOp == nextBlendStateDesc.alphaBlendOp)
{
glBlendEquation(Mapping::FromBlendOp(nextBlendStateDesc.colorBlendOp));
}
else
{
glBlendEquationSeparate(
Mapping::FromBlendOp(nextBlendStateDesc.colorBlendOp),
Mapping::FromBlendOp(nextBlendStateDesc.alphaBlendOp));
}
}
if (force ||
currentBlendStateDesc.constant != nextBlendStateDesc.constant)
{
glBlendColor(
nextBlendStateDesc.constant.r,
nextBlendStateDesc.constant.g,
nextBlendStateDesc.constant.b,
nextBlendStateDesc.constant.a);
}
if (force ||
currentBlendStateDesc.colorWriteMask != nextBlendStateDesc.colorWriteMask)
{
ApplyColorMask(nextBlendStateDesc.colorWriteMask);
}
const SRasterizationStateDesc& currentRasterizationStateDesc =
m_GraphicsPipelineStateDesc.rasterizationState;
const SRasterizationStateDesc& nextRasterizationStateDesc =
pipelineStateDesc.rasterizationState;
if (force ||
currentRasterizationStateDesc.polygonMode != nextRasterizationStateDesc.polygonMode)
{
#if !CONFIG2_GLES
glPolygonMode(
GL_FRONT_AND_BACK,
nextRasterizationStateDesc.polygonMode == PolygonMode::LINE ? GL_LINE : GL_FILL);
#endif
}
if (force ||
currentRasterizationStateDesc.cullMode != nextRasterizationStateDesc.cullMode)
{
if (nextRasterizationStateDesc.cullMode == CullMode::NONE)
{
glDisable(GL_CULL_FACE);
}
else
{
if (force || currentRasterizationStateDesc.cullMode == CullMode::NONE)
glEnable(GL_CULL_FACE);
glCullFace(nextRasterizationStateDesc.cullMode == CullMode::FRONT ? GL_FRONT : GL_BACK);
}
}
if (force ||
currentRasterizationStateDesc.frontFace != nextRasterizationStateDesc.frontFace)
{
if (nextRasterizationStateDesc.frontFace == FrontFace::CLOCKWISE)
glFrontFace(GL_CW);
else
glFrontFace(GL_CCW);
}
#if !CONFIG2_GLES
if (force ||
currentRasterizationStateDesc.depthBiasEnabled != nextRasterizationStateDesc.depthBiasEnabled)
{
if (nextRasterizationStateDesc.depthBiasEnabled)
glEnable(GL_POLYGON_OFFSET_FILL);
else
glDisable(GL_POLYGON_OFFSET_FILL);
}
if (force ||
currentRasterizationStateDesc.depthBiasConstantFactor != nextRasterizationStateDesc.depthBiasConstantFactor ||
currentRasterizationStateDesc.depthBiasSlopeFactor != nextRasterizationStateDesc.depthBiasSlopeFactor)
{
glPolygonOffset(
nextRasterizationStateDesc.depthBiasSlopeFactor,
nextRasterizationStateDesc.depthBiasConstantFactor);
}
#endif
ogl_WarnIfError();
m_GraphicsPipelineStateDesc = pipelineStateDesc;
}
void CDeviceCommandContext::BlitFramebuffer(
IFramebuffer* srcFramebuffer, IFramebuffer* dstFramebuffer,
const Rect& sourceRegion, const Rect& destinationRegion,
const Sampler::Filter filter)
{
ENSURE(!m_InsideFramebufferPass);
CFramebuffer* destinationFramebuffer = dstFramebuffer->As();
CFramebuffer* sourceFramebuffer = srcFramebuffer->As();
#if CONFIG2_GLES
UNUSED2(destinationFramebuffer);
UNUSED2(sourceFramebuffer);
UNUSED2(destinationRegion);
UNUSED2(sourceRegion);
UNUSED2(filter);
debug_warn("CDeviceCommandContext::BlitFramebuffer is not implemented for GLES");
#else
// Source framebuffer should not be backbuffer.
ENSURE(sourceFramebuffer->GetHandle() != 0);
ENSURE(destinationFramebuffer != sourceFramebuffer);
glBindFramebufferEXT(GL_READ_FRAMEBUFFER_EXT, sourceFramebuffer->GetHandle());
glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, destinationFramebuffer->GetHandle());
// TODO: add more check for internal formats.
glBlitFramebufferEXT(
sourceRegion.x, sourceRegion.y, sourceRegion.width, sourceRegion.height,
destinationRegion.x, destinationRegion.y, destinationRegion.width, destinationRegion.height,
(sourceFramebuffer->GetAttachmentMask() & destinationFramebuffer->GetAttachmentMask()),
filter == Sampler::Filter::LINEAR ? GL_LINEAR : GL_NEAREST);
ogl_WarnIfError();
#endif
}
void CDeviceCommandContext::ResolveFramebuffer(
IFramebuffer* srcFramebuffer, IFramebuffer* dstFramebuffer)
{
ENSURE(!m_InsideFramebufferPass);
CFramebuffer* destinationFramebuffer = dstFramebuffer->As();
CFramebuffer* sourceFramebuffer = srcFramebuffer->As();
ENSURE(destinationFramebuffer->GetWidth() == sourceFramebuffer->GetWidth());
ENSURE(destinationFramebuffer->GetHeight() == sourceFramebuffer->GetHeight());
#if CONFIG2_GLES
UNUSED2(destinationFramebuffer);
UNUSED2(sourceFramebuffer);
debug_warn("CDeviceCommandContext::ResolveFramebuffer is not implemented for GLES");
#else
// Source framebuffer should not be backbuffer.
ENSURE(sourceFramebuffer->GetHandle() != 0);
ENSURE(destinationFramebuffer != sourceFramebuffer);
glBindFramebufferEXT(GL_READ_FRAMEBUFFER_EXT, sourceFramebuffer->GetHandle());
glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, destinationFramebuffer->GetHandle());
glBlitFramebufferEXT(
0, 0, sourceFramebuffer->GetWidth(), sourceFramebuffer->GetHeight(),
0, 0, sourceFramebuffer->GetWidth(), sourceFramebuffer->GetHeight(),
(sourceFramebuffer->GetAttachmentMask() & destinationFramebuffer->GetAttachmentMask()),
GL_NEAREST);
ogl_WarnIfError();
#endif
}
void CDeviceCommandContext::ClearFramebuffer(const bool color, const bool depth, const bool stencil)
{
ENSURE(m_InsideFramebufferPass);
const bool needsColor = color && (m_Framebuffer->GetAttachmentMask() & GL_COLOR_BUFFER_BIT) != 0;
const bool needsDepth = depth && (m_Framebuffer->GetAttachmentMask() & GL_DEPTH_BUFFER_BIT) != 0;
const bool needsStencil = stencil && (m_Framebuffer->GetAttachmentMask() & GL_STENCIL_BUFFER_BIT) != 0;
GLbitfield mask = 0;
if (needsColor)
{
ApplyColorMask(ColorWriteMask::RED | ColorWriteMask::GREEN | ColorWriteMask::BLUE | ColorWriteMask::ALPHA);
glClearColor(
m_Framebuffer->GetClearColor().r,
m_Framebuffer->GetClearColor().g,
m_Framebuffer->GetClearColor().b,
m_Framebuffer->GetClearColor().a);
mask |= GL_COLOR_BUFFER_BIT;
}
if (needsDepth)
{
ApplyDepthMask(true);
mask |= GL_DEPTH_BUFFER_BIT;
}
if (needsStencil)
{
ApplyStencilMask(std::numeric_limits::max());
mask |= GL_STENCIL_BUFFER_BIT;
}
glClear(mask);
ogl_WarnIfError();
if (needsColor)
ApplyColorMask(m_GraphicsPipelineStateDesc.blendState.colorWriteMask);
if (needsDepth)
ApplyDepthMask(m_GraphicsPipelineStateDesc.depthStencilState.depthWriteEnabled);
if (needsStencil)
ApplyStencilMask(m_GraphicsPipelineStateDesc.depthStencilState.stencilWriteMask);
}
void CDeviceCommandContext::BeginFramebufferPass(IFramebuffer* framebuffer)
{
SetGraphicsPipelineStateImpl(
MakeDefaultGraphicsPipelineStateDesc(), false);
ENSURE(!m_InsideFramebufferPass);
m_InsideFramebufferPass = true;
ENSURE(framebuffer);
m_Framebuffer = framebuffer->As();
ENSURE(m_Framebuffer->GetHandle() == 0 || (m_Framebuffer->GetWidth() > 0 && m_Framebuffer->GetHeight() > 0));
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, m_Framebuffer->GetHandle());
ogl_WarnIfError();
if (m_Device->UseFramebufferInvalidating())
{
InvalidateFramebuffer(
m_Framebuffer,
m_Framebuffer->GetColorAttachmentLoadOp() != AttachmentLoadOp::LOAD,
m_Framebuffer->GetDepthStencilAttachmentLoadOp() != AttachmentLoadOp::LOAD);
}
const bool needsClearColor =
m_Framebuffer->GetColorAttachmentLoadOp() == AttachmentLoadOp::CLEAR;
const bool needsClearDepthStencil =
m_Framebuffer->GetDepthStencilAttachmentLoadOp() == AttachmentLoadOp::CLEAR;
if (needsClearColor || needsClearDepthStencil)
{
ClearFramebuffer(
needsClearColor, needsClearDepthStencil, needsClearDepthStencil);
}
}
void CDeviceCommandContext::EndFramebufferPass()
{
if (m_Device->UseFramebufferInvalidating())
{
InvalidateFramebuffer(
m_Framebuffer,
m_Framebuffer->GetColorAttachmentStoreOp() != AttachmentStoreOp::STORE,
m_Framebuffer->GetDepthStencilAttachmentStoreOp() != AttachmentStoreOp::STORE);
}
ENSURE(m_InsideFramebufferPass);
m_InsideFramebufferPass = false;
CFramebuffer* framebuffer = m_Device->GetCurrentBackbuffer(
Renderer::Backend::AttachmentLoadOp::DONT_CARE,
Renderer::Backend::AttachmentStoreOp::DONT_CARE,
Renderer::Backend::AttachmentLoadOp::DONT_CARE,
Renderer::Backend::AttachmentStoreOp::DONT_CARE)->As();
if (framebuffer->GetHandle() != m_Framebuffer->GetHandle())
{
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, framebuffer->GetHandle());
ogl_WarnIfError();
}
m_Framebuffer = framebuffer;
+
+ SetGraphicsPipelineStateImpl(MakeDefaultGraphicsPipelineStateDesc(), false);
}
void CDeviceCommandContext::ReadbackFramebufferSync(
const uint32_t x, const uint32_t y, const uint32_t width, const uint32_t height,
void* data)
{
ENSURE(m_Framebuffer);
glReadPixels(x, y, width, height, GL_RGB, GL_UNSIGNED_BYTE, data);
ogl_WarnIfError();
}
void CDeviceCommandContext::SetScissors(const uint32_t scissorCount, const Rect* scissors)
{
ENSURE(scissorCount <= 1);
if (scissorCount == 0)
{
if (m_ScissorCount != scissorCount)
glDisable(GL_SCISSOR_TEST);
}
else
{
if (m_ScissorCount != scissorCount)
glEnable(GL_SCISSOR_TEST);
ENSURE(scissors);
if (m_ScissorCount != scissorCount || m_Scissors[0] != scissors[0])
{
m_Scissors[0] = scissors[0];
glScissor(m_Scissors[0].x, m_Scissors[0].y, m_Scissors[0].width, m_Scissors[0].height);
}
}
ogl_WarnIfError();
m_ScissorCount = scissorCount;
}
void CDeviceCommandContext::SetViewports(const uint32_t viewportCount, const Rect* viewports)
{
ENSURE(m_InsideFramebufferPass);
ENSURE(viewportCount == 1);
glViewport(viewports[0].x, viewports[0].y, viewports[0].width, viewports[0].height);
ogl_WarnIfError();
}
void CDeviceCommandContext::SetVertexInputLayout(
IVertexInputLayout* vertexInputLayout)
{
ENSURE(vertexInputLayout);
for (const SVertexAttributeFormat& attribute : vertexInputLayout->As()->GetAttributes())
{
const uint32_t index = static_cast(attribute.stream);
ENSURE(index < m_VertexAttributeFormat.size());
ENSURE(attribute.bindingSlot < m_VertexAttributeFormat.size());
if (!m_VertexAttributeFormat[index].active)
continue;
m_VertexAttributeFormat[index].format = attribute.format;
m_VertexAttributeFormat[index].offset = attribute.offset;
m_VertexAttributeFormat[index].stride = attribute.stride;
m_VertexAttributeFormat[index].rate = attribute.rate;
m_VertexAttributeFormat[index].bindingSlot = attribute.bindingSlot;
m_VertexAttributeFormat[index].initialized = true;
}
}
void CDeviceCommandContext::SetVertexBuffer(
const uint32_t bindingSlot, IBuffer* buffer, const uint32_t offset)
{
ENSURE(buffer);
ENSURE(buffer->GetType() == IBuffer::Type::VERTEX);
ENSURE(m_ShaderProgram);
BindBuffer(buffer->GetType(), buffer->As());
for (size_t index = 0; index < m_VertexAttributeFormat.size(); ++index)
{
if (!m_VertexAttributeFormat[index].active || m_VertexAttributeFormat[index].bindingSlot != bindingSlot)
continue;
ENSURE(m_VertexAttributeFormat[index].initialized);
const VertexAttributeStream stream = static_cast(index);
m_ShaderProgram->VertexAttribPointer(stream,
m_VertexAttributeFormat[index].format,
m_VertexAttributeFormat[index].offset + offset,
m_VertexAttributeFormat[index].stride,
m_VertexAttributeFormat[index].rate,
nullptr);
}
}
void CDeviceCommandContext::SetVertexBufferData(
const uint32_t bindingSlot, const void* data, const uint32_t dataSize)
{
ENSURE(data);
ENSURE(m_ShaderProgram);
ENSURE(dataSize > 0);
BindBuffer(CBuffer::Type::VERTEX, nullptr);
for (size_t index = 0; index < m_VertexAttributeFormat.size(); ++index)
{
if (!m_VertexAttributeFormat[index].active || m_VertexAttributeFormat[index].bindingSlot != bindingSlot)
continue;
ENSURE(m_VertexAttributeFormat[index].initialized);
const VertexAttributeStream stream = static_cast(index);
// We don't know how many vertices will be used in a draw command, so we
// assume at least one vertex.
ENSURE(dataSize >= m_VertexAttributeFormat[index].offset + m_VertexAttributeFormat[index].stride);
m_ShaderProgram->VertexAttribPointer(stream,
m_VertexAttributeFormat[index].format,
m_VertexAttributeFormat[index].offset,
m_VertexAttributeFormat[index].stride,
m_VertexAttributeFormat[index].rate,
data);
}
}
void CDeviceCommandContext::SetIndexBuffer(IBuffer* buffer)
{
ENSURE(buffer->GetType() == CBuffer::Type::INDEX);
m_IndexBuffer = buffer->As();
m_IndexBufferData = nullptr;
BindBuffer(CBuffer::Type::INDEX, m_IndexBuffer);
}
void CDeviceCommandContext::SetIndexBufferData(const void* data, const uint32_t dataSize)
{
ENSURE(dataSize > 0);
if (m_IndexBuffer)
{
BindBuffer(CBuffer::Type::INDEX, nullptr);
m_IndexBuffer = nullptr;
}
m_IndexBufferData = data;
}
void CDeviceCommandContext::BeginPass()
{
ENSURE(!m_InsidePass);
m_InsidePass = true;
}
void CDeviceCommandContext::EndPass()
{
ENSURE(m_InsidePass);
m_InsidePass = false;
}
void CDeviceCommandContext::Draw(
const uint32_t firstVertex, const uint32_t vertexCount)
{
ENSURE(m_ShaderProgram);
ENSURE(m_InsidePass);
// Some drivers apparently don't like count = 0 in glDrawArrays here, so skip
// all drawing in that case.
if (vertexCount == 0)
return;
m_ShaderProgram->AssertPointersBound();
glDrawArrays(GL_TRIANGLES, firstVertex, vertexCount);
ogl_WarnIfError();
}
void CDeviceCommandContext::DrawIndexed(
const uint32_t firstIndex, const uint32_t indexCount, const int32_t vertexOffset)
{
ENSURE(m_ShaderProgram);
ENSURE(m_InsidePass);
if (indexCount == 0)
return;
ENSURE(m_IndexBuffer || m_IndexBufferData);
ENSURE(vertexOffset == 0);
if (m_IndexBuffer)
{
ENSURE(sizeof(uint16_t) * (firstIndex + indexCount) <= m_IndexBuffer->GetSize());
}
m_ShaderProgram->AssertPointersBound();
// Don't use glMultiDrawElements here since it doesn't have a significant
// performance impact and it suffers from various driver bugs (e.g. it breaks
// in Mesa 7.10 swrast with index VBOs).
glDrawElements(GL_TRIANGLES, indexCount, GL_UNSIGNED_SHORT,
static_cast((static_cast(m_IndexBufferData) + sizeof(uint16_t) * firstIndex)));
ogl_WarnIfError();
}
void CDeviceCommandContext::DrawInstanced(
const uint32_t firstVertex, const uint32_t vertexCount,
const uint32_t firstInstance, const uint32_t instanceCount)
{
ENSURE(m_Device->GetCapabilities().instancing);
ENSURE(m_ShaderProgram);
ENSURE(m_InsidePass);
if (vertexCount == 0 || instanceCount == 0)
return;
ENSURE(firstInstance == 0);
m_ShaderProgram->AssertPointersBound();
#if CONFIG2_GLES
ENSURE(!m_Device->GetCapabilities().instancing);
UNUSED2(firstVertex);
UNUSED2(vertexCount);
UNUSED2(instanceCount);
#else
glDrawArraysInstancedARB(GL_TRIANGLES, firstVertex, vertexCount, instanceCount);
#endif
ogl_WarnIfError();
}
void CDeviceCommandContext::DrawIndexedInstanced(
const uint32_t firstIndex, const uint32_t indexCount,
const uint32_t firstInstance, const uint32_t instanceCount,
const int32_t vertexOffset)
{
ENSURE(m_Device->GetCapabilities().instancing);
ENSURE(m_ShaderProgram);
ENSURE(m_InsidePass);
ENSURE(m_IndexBuffer || m_IndexBufferData);
if (indexCount == 0)
return;
ENSURE(firstInstance == 0 && vertexOffset == 0);
if (m_IndexBuffer)
{
ENSURE(sizeof(uint16_t) * (firstIndex + indexCount) <= m_IndexBuffer->GetSize());
}
m_ShaderProgram->AssertPointersBound();
// Don't use glMultiDrawElements here since it doesn't have a significant
// performance impact and it suffers from various driver bugs (e.g. it breaks
// in Mesa 7.10 swrast with index VBOs).
#if CONFIG2_GLES
ENSURE(!m_Device->GetCapabilities().instancing);
UNUSED2(indexCount);
UNUSED2(firstIndex);
UNUSED2(instanceCount);
#else
glDrawElementsInstancedARB(GL_TRIANGLES, indexCount, GL_UNSIGNED_SHORT,
static_cast((static_cast(m_IndexBufferData) + sizeof(uint16_t) * firstIndex)),
instanceCount);
#endif
ogl_WarnIfError();
}
void CDeviceCommandContext::DrawIndexedInRange(
const uint32_t firstIndex, const uint32_t indexCount,
const uint32_t start, const uint32_t end)
{
ENSURE(m_ShaderProgram);
ENSURE(m_InsidePass);
if (indexCount == 0)
return;
ENSURE(m_IndexBuffer || m_IndexBufferData);
const void* indices =
static_cast((static_cast(m_IndexBufferData) + sizeof(uint16_t) * firstIndex));
m_ShaderProgram->AssertPointersBound();
// Draw with DrawRangeElements where available, since it might be more
// efficient for slow hardware.
#if CONFIG2_GLES
UNUSED2(start);
UNUSED2(end);
glDrawElements(GL_TRIANGLES, indexCount, GL_UNSIGNED_SHORT, indices);
#else
glDrawRangeElementsEXT(GL_TRIANGLES, start, end, indexCount, GL_UNSIGNED_SHORT, indices);
#endif
ogl_WarnIfError();
}
+void CDeviceCommandContext::BeginComputePass()
+{
+ ENSURE(!m_InsideFramebufferPass);
+ ENSURE(!m_InsideComputePass);
+ m_InsideComputePass = true;
+}
+
+void CDeviceCommandContext::EndComputePass()
+{
+ ENSURE(m_InsideComputePass);
+ m_InsideComputePass = false;
+}
+
+void CDeviceCommandContext::Dispatch(
+ const uint32_t groupCountX,
+ const uint32_t groupCountY,
+ const uint32_t groupCountZ)
+{
+ ENSURE(m_InsideComputePass);
+ glDispatchCompute(groupCountX, groupCountY, groupCountZ);
+ // TODO: we might want to do binding tracking to avoid redundant barriers.
+ glMemoryBarrier(
+ GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT | GL_FRAMEBUFFER_BARRIER_BIT);
+}
+
void CDeviceCommandContext::SetTexture(const int32_t bindingSlot, ITexture* texture)
{
ENSURE(m_ShaderProgram);
ENSURE(texture);
ENSURE(texture->GetUsage() & Renderer::Backend::ITexture::Usage::SAMPLED);
const CShaderProgram::TextureUnit textureUnit =
m_ShaderProgram->GetTextureUnit(bindingSlot);
if (!textureUnit.type)
return;
if (textureUnit.type != GL_SAMPLER_2D &&
#if !CONFIG2_GLES
textureUnit.type != GL_SAMPLER_2D_SHADOW &&
#endif
textureUnit.type != GL_SAMPLER_CUBE)
{
LOGERROR("CDeviceCommandContext::SetTexture: expected sampler at binding slot");
return;
}
#if !CONFIG2_GLES
if (textureUnit.type == GL_SAMPLER_2D_SHADOW)
{
if (!IsDepthFormat(texture->GetFormat()))
{
LOGERROR("CDeviceCommandContext::SetTexture: Invalid texture type (expected depth texture)");
return;
}
}
#endif
ENSURE(textureUnit.unit >= 0);
const uint32_t unit = textureUnit.unit;
if (unit >= m_BoundTextures.size())
{
LOGERROR("CDeviceCommandContext::SetTexture: Invalid texture unit (too big)");
return;
}
BindTexture(unit, textureUnit.target, texture->As()->GetHandle());
}
+void CDeviceCommandContext::SetStorageTexture(const int32_t bindingSlot, ITexture* texture)
+{
+ ENSURE(m_ShaderProgram);
+ ENSURE(texture);
+ ENSURE(texture->GetUsage() & Renderer::Backend::ITexture::Usage::STORAGE);
+
+ const CShaderProgram::TextureUnit textureUnit =
+ m_ShaderProgram->GetTextureUnit(bindingSlot);
+ if (!textureUnit.type)
+ return;
+ ENSURE(textureUnit.type == GL_IMAGE_2D);
+ ENSURE(texture->GetFormat() == Format::R8G8B8A8_UNORM);
+ glBindImageTexture(textureUnit.unit, texture->As()->GetHandle(), 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA8);
+}
+
void CDeviceCommandContext::SetUniform(
const int32_t bindingSlot,
const float value)
{
ENSURE(m_ShaderProgram);
m_ShaderProgram->SetUniform(bindingSlot, value);
}
void CDeviceCommandContext::SetUniform(
const int32_t bindingSlot,
const float valueX, const float valueY)
{
ENSURE(m_ShaderProgram);
m_ShaderProgram->SetUniform(bindingSlot, valueX, valueY);
}
void CDeviceCommandContext::SetUniform(
const int32_t bindingSlot,
const float valueX, const float valueY,
const float valueZ)
{
ENSURE(m_ShaderProgram);
m_ShaderProgram->SetUniform(bindingSlot, valueX, valueY, valueZ);
}
void CDeviceCommandContext::SetUniform(
const int32_t bindingSlot,
const float valueX, const float valueY,
const float valueZ, const float valueW)
{
ENSURE(m_ShaderProgram);
m_ShaderProgram->SetUniform(bindingSlot, valueX, valueY, valueZ, valueW);
}
void CDeviceCommandContext::SetUniform(
const int32_t bindingSlot, PS::span values)
{
ENSURE(m_ShaderProgram);
m_ShaderProgram->SetUniform(bindingSlot, values);
}
CDeviceCommandContext::ScopedBind::ScopedBind(
CDeviceCommandContext* deviceCommandContext,
const GLenum target, const GLuint handle)
: m_DeviceCommandContext(deviceCommandContext),
m_OldBindUnit(deviceCommandContext->m_BoundTextures[deviceCommandContext->m_ActiveTextureUnit]),
m_ActiveTextureUnit(deviceCommandContext->m_ActiveTextureUnit)
{
const uint32_t unit = m_DeviceCommandContext->m_BoundTextures.size() - 1;
m_DeviceCommandContext->BindTexture(unit, target, handle);
}
CDeviceCommandContext::ScopedBind::~ScopedBind()
{
m_DeviceCommandContext->BindTexture(
m_ActiveTextureUnit, m_OldBindUnit.target, m_OldBindUnit.handle);
}
CDeviceCommandContext::ScopedBufferBind::ScopedBufferBind(
CDeviceCommandContext* deviceCommandContext, CBuffer* buffer)
: m_DeviceCommandContext(deviceCommandContext)
{
ENSURE(buffer);
m_CacheIndex = static_cast(buffer->GetType());
const GLenum target = BufferTypeToGLTarget(buffer->GetType());
const GLuint handle = buffer->GetHandle();
if (m_DeviceCommandContext->m_BoundBuffers[m_CacheIndex].first == target &&
m_DeviceCommandContext->m_BoundBuffers[m_CacheIndex].second == handle)
{
// Use an invalid index as a sign that we don't need to restore the
// bound buffer.
m_CacheIndex = m_DeviceCommandContext->m_BoundBuffers.size();
}
else
{
glBindBufferARB(target, handle);
}
}
CDeviceCommandContext::ScopedBufferBind::~ScopedBufferBind()
{
if (m_CacheIndex >= m_DeviceCommandContext->m_BoundBuffers.size())
return;
glBindBufferARB(
m_DeviceCommandContext->m_BoundBuffers[m_CacheIndex].first,
m_DeviceCommandContext->m_BoundBuffers[m_CacheIndex].second);
}
} // namespace GL
} // namespace Backend
} // namespace Renderer
Index: ps/trunk/source/graphics/ShaderTechnique.cpp
===================================================================
--- ps/trunk/source/graphics/ShaderTechnique.cpp (revision 28009)
+++ ps/trunk/source/graphics/ShaderTechnique.cpp (revision 28010)
@@ -1,71 +1,97 @@
-/* Copyright (C) 2022 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#include "precompiled.h"
#include "ShaderTechnique.h"
#include "graphics/ShaderProgram.h"
#include "renderer/backend/IDevice.h"
+#include
+
CShaderPass::CShaderPass(
std::unique_ptr pipelineState,
const CShaderProgramPtr& shader)
: m_Shader(shader), m_PipelineState(std::move(pipelineState))
{
ENSURE(shader);
}
CShaderTechnique::CShaderTechnique(
const VfsPath& path, const CShaderDefines& defines,
const PipelineStateDescCallback& callback)
: m_Path(path), m_Defines(defines), m_PipelineStateDescCallback(callback)
{
}
void CShaderTechnique::SetPasses(std::vector&& passes)
{
+ ENSURE(!m_ComputePipelineState);
m_Passes = std::move(passes);
}
+void CShaderTechnique::SetComputePipelineState(
+ std::unique_ptr pipelineState,
+ const CShaderProgramPtr& computeShader)
+{
+ ENSURE(m_Passes.empty());
+ m_ComputePipelineState = std::move(pipelineState);
+ m_ComputeShader = computeShader;
+}
+
int CShaderTechnique::GetNumPasses() const
{
return m_Passes.size();
}
Renderer::Backend::IShaderProgram* CShaderTechnique::GetShader(int pass) const
{
- ENSURE(0 <= pass && pass < (int)m_Passes.size());
- return m_Passes[pass].GetPipelineState()->GetShaderProgram();
+ if (m_ComputeShader)
+ {
+ ENSURE(pass == 0);
+ return m_ComputeShader->GetBackendShaderProgram();
+ }
+ else
+ {
+ ENSURE(0 <= pass && pass < (int)m_Passes.size());
+ return m_Passes[pass].GetPipelineState()->GetShaderProgram();
+ }
}
Renderer::Backend::IGraphicsPipelineState*
CShaderTechnique::GetGraphicsPipelineState(int pass) const
{
ENSURE(0 <= pass && pass < static_cast(m_Passes.size()));
return m_Passes[pass].GetPipelineState();
}
+Renderer::Backend::IComputePipelineState*
+CShaderTechnique::GetComputePipelineState() const
+{
+ return m_ComputePipelineState.get();
+}
+
bool CShaderTechnique::GetSortByDistance() const
{
return m_SortByDistance;
}
void CShaderTechnique::SetSortByDistance(bool enable)
{
m_SortByDistance = enable;
}
Index: ps/trunk/source/ps/CStrInternStatic.h
===================================================================
--- ps/trunk/source/ps/CStrInternStatic.h (revision 28009)
+++ ps/trunk/source/ps/CStrInternStatic.h (revision 28010)
@@ -1,196 +1,202 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
// This file defines global CStrIntern variables, to avoid the cost of
// constructing CStrInterns frequently at runtime.
//
// A line like
// X(foo)
// defines a variable str_foo with value "foo".
//
// A line like
// X2(foo_0, "foo[0]")
// defines a variable str_foo_0 with value "foo[0]".
// For direct inclusion, we presumably just want the extern definitions.
#ifndef X
#include "CStrIntern.h"
#define X(id) extern CStrIntern str_##id;
#define X2(id, str) extern CStrIntern str_##id;
#endif
X(0)
X(1)
X(2)
X(3)
X(4)
X(ALPHABLEND_PASS_BLEND)
X(ALPHABLEND_PASS_OPAQUE)
X(BLEND)
X(BLOOM_NOP)
X(BLOOM_PASS_H)
X(BLOOM_PASS_V)
X(DECAL)
X(DISABLE_RECEIVE_SHADOWS)
X(IGNORE_LOS)
X(MINIMAP_BASE)
X(MINIMAP_POINT)
X(MODE_SHADOWCAST)
X(MODE_SILHOUETTEDISPLAY)
X(MODE_SILHOUETTEOCCLUDER)
X(MODE_WIREFRAME)
X(MODE_WIREFRAME_SOLID)
X(PASS_REFLECTIONS)
X(PASS_REFRACTIONS)
X(PASS_SHADOWS)
X(RENDER_DEBUG_MODE)
X(RENDER_DEBUG_MODE_AO)
X(RENDER_DEBUG_MODE_ALPHA)
X(RENDER_DEBUG_MODE_CUSTOM)
X(RENDER_DEBUG_MODE_NONE)
X(SHADOWS_CASCADE_COUNT)
X(USE_DESCRIPTOR_INDEXING)
X(USE_FANCY_EFFECTS)
X(USE_FP_SHADOW)
X(USE_GPU_INSTANCING)
X(USE_GPU_SKINNING)
X(USE_INSTANCING)
X(USE_NORMALS)
X(USE_OBJECTCOLOR)
X(USE_REAL_DEPTH)
X(USE_REFLECTION)
X(USE_REFRACTION)
X(USE_SHADOW)
X(USE_SHADOW_PCF)
X(USE_SHADOW_SAMPLER)
X(USE_FOG)
X(WATERTYPE_CLAP)
X(WATERTYPE_LAKE)
X2(_emptystring, "")
X(a_apexPosition)
X(a_otherPosition)
X(a_retreatPosition)
X(a_skinJoints)
X(a_skinWeights)
X(a_splashPosition)
X(a_tangent)
X(a_waterInfo)
X(ambient)
X(baseTex)
X(blendTex)
X(bloom)
X(blurTex2)
X(blurTex4)
X(blurTex8)
X(brightness)
X(cameraForward)
X(cameraPos)
X(canvas2d)
X(color)
X(colorAdd)
X(colorMul)
+X(compute_rcas)
+X(compute_upscale_fsr)
X(debug_line)
X(debug_overlay)
X(delta)
X(depthTex)
X(dummy)
X(foamTex)
X(fogColor)
X(fogParams)
X(foreground_overlay)
X(fxaa)
X(grayscaleFactor)
X(hdr)
X(height)
X(instancingTransform)
+X(inTex)
X(losTex)
X(losTex1)
X(losTex2)
X(losTransform)
X(los_interp)
X(mapSize)
X(maskTex)
X(maskTextureTransform)
X(minimap)
X(minimap_los)
X(modelViewMatrix)
X(murkiness)
X(normalMap)
X(normalMap2)
X(objectColor)
X(overlay_line)
X(overlay_solid)
+X(outTex)
X(particle_add)
X(particle_multiply)
X(particle_overlay)
X(particle_solid)
X(particle_subtract)
X(playerColor)
X(projInvTransform)
X(qualityLevel)
X(reflectionMap)
X(reflectionMatrix)
X(refractionMap)
X(refractionMatrix)
X(renderedTex)
X(repeatScale)
X2(sans_10, "sans-10");
X(saturation)
X(screenSize)
X(shadingColor)
X(shadowDistance)
X(shadowDistances)
X(shadowScale)
X(shadowTex)
X(shadowTransform)
X(shadowTransforms)
X(sharpness)
X(skinBlendMatrices)
X(skyBoxRot)
X(skyCube)
X(sky_simple)
X(solid)
X(sunColor)
X(sunDir)
X(terrain_base)
X(terrain_blend)
X(terrain_decal)
X(terrain_solid)
X(tex)
X(texSize)
X(textureTransform)
X(time)
X(tint)
X(transform)
X(translation)
+X(upscale_bilinear)
+X(upscale_nearest)
X(viewInvTransform)
X(water_high)
X(water_simple)
X(water_waves)
X(waterEffectsTex)
X(waterTex)
X(waveTex)
X(waviness)
X(waveParams1)
X(waveParams2)
X(width)
X(windAngle)
X(zFar)
X(zNear)
#undef X
#undef X2
Index: ps/trunk/source/renderer/PostprocManager.h
===================================================================
--- ps/trunk/source/renderer/PostprocManager.h (revision 28009)
+++ ps/trunk/source/renderer/PostprocManager.h (revision 28010)
@@ -1,189 +1,231 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#ifndef INCLUDED_POSTPROCMANAGER
#define INCLUDED_POSTPROCMANAGER
#include "graphics/ShaderTechniquePtr.h"
#include "ps/CStr.h"
#include "renderer/backend/IFramebuffer.h"
#include "renderer/backend/IDeviceCommandContext.h"
#include "renderer/backend/IShaderProgram.h"
#include "renderer/backend/ITexture.h"
#include
#include
class CPostprocManager
{
public:
CPostprocManager(Renderer::Backend::IDevice* device);
~CPostprocManager();
// Returns true if the the manager can be used.
bool IsEnabled() const;
// Create all buffers/textures in GPU memory and set default effect.
// @note Must be called before using in the renderer. May be called multiple times.
void Initialize();
// Update the size of the screen
void Resize();
// Returns a list of xml files found in shaders/effects/postproc.
static std::vector GetPostEffects();
// Returns the name of the current effect.
const CStrW& GetPostEffect() const
{
return m_PostProcEffect;
}
// Sets the current effect.
void SetPostEffect(const CStrW& name);
- // Triggers update of shaders and FBO if needed.
+ // Triggers update of shaders and framebuffers if needed.
void UpdateAntiAliasingTechnique();
void UpdateSharpeningTechnique();
void UpdateSharpnessFactor();
+ void SetUpscaleTechnique(const CStr& upscaleName);
void SetDepthBufferClipPlanes(float nearPlane, float farPlane);
// @note CPostprocManager must be initialized first
Renderer::Backend::IFramebuffer* PrepareAndGetOutputFramebuffer();
// First renders blur textures, then calls ApplyEffect for each effect pass,
// ping-ponging the buffers at each step.
// @note CPostprocManager must be initialized first
void ApplyPostproc(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext);
// Blits the final postprocessed texture to the system framebuffer. The system
// framebuffer is selected as the output buffer. Should be called before
// silhouette rendering.
// @note CPostprocManager must be initialized first
void BlitOutputFramebuffer(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
Renderer::Backend::IFramebuffer* destination);
// Returns true if we render main scene in the MSAA framebuffer.
bool IsMultisampleEnabled() const;
// Resolves the MSAA framebuffer into the regular one.
void ResolveMultisampleFramebuffer(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext);
private:
void CreateMultisampleBuffer();
void DestroyMultisampleBuffer();
+ void RecalculateSize(const uint32_t width, const uint32_t height);
+
+ bool ShouldUpscale() const;
+ bool ShouldDownscale() const;
+
+ void UpscaleTextureByCompute(
+ Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
+ CShaderTechnique* shaderTechnique,
+ Renderer::Backend::ITexture* source,
+ Renderer::Backend::ITexture* destination);
+ void UpscaleTextureByFullscreenQuad(
+ Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
+ CShaderTechnique* shaderTechnique,
+ Renderer::Backend::ITexture* source,
+ Renderer::Backend::IFramebuffer* destination);
+
+ void ApplySharpnessAfterScale(
+ Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
+ CShaderTechnique* shaderTechnique,
+ Renderer::Backend::ITexture* source,
+ Renderer::Backend::ITexture* destination);
+
+ void DownscaleTextureByCompute(
+ Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
+ CShaderTechnique* shaderTechnique,
+ Renderer::Backend::ITexture* source,
+ Renderer::Backend::ITexture* destination);
+
Renderer::Backend::IDevice* m_Device = nullptr;
std::unique_ptr m_CaptureFramebuffer;
// Two framebuffers, that we flip between at each shader pass.
std::unique_ptr
m_PingFramebuffer, m_PongFramebuffer;
// Unique color textures for the framebuffers.
std::unique_ptr m_ColorTex1, m_ColorTex2;
+ std::unique_ptr
+ m_UnscaledTexture1, m_UnscaledTexture2;
+ std::unique_ptr
+ m_UnscaledFramebuffer1, m_UnscaledFramebuffer2;
+ float m_Scale = 1.0f;
+
// The framebuffers share a depth/stencil texture.
std::unique_ptr m_DepthTex;
float m_NearPlane, m_FarPlane;
// A framebuffer and textures x2 for each blur level we render.
struct BlurScale
{
struct Step
{
std::unique_ptr framebuffer;
std::unique_ptr texture;
};
std::array steps;
};
std::array m_BlurScales;
// Indicates which of the ping-pong buffers is used for reading and which for drawing.
bool m_WhichBuffer;
Renderer::Backend::IVertexInputLayout* m_VertexInputLayout = nullptr;
// The name and shader technique we are using. "default" name means no technique is used
// (i.e. while we do allocate the buffers, no effects are rendered).
CStrW m_PostProcEffect;
CShaderTechniquePtr m_PostProcTech;
CStr m_SharpName;
CShaderTechniquePtr m_SharpTech;
float m_Sharpness;
+ CShaderTechniquePtr m_UpscaleTech;
+ CShaderTechniquePtr m_UpscaleComputeTech;
+ CShaderTechniquePtr m_DownscaleComputeTech;
+ // Sharp technique only for FSR upscale.
+ CShaderTechniquePtr m_RCASComputeTech;
+
CStr m_AAName;
CShaderTechniquePtr m_AATech;
bool m_UsingMultisampleBuffer;
std::unique_ptr m_MultisampleFramebuffer;
std::unique_ptr
m_MultisampleColorTex, m_MultisampleDepthTex;
uint32_t m_MultisampleCount;
std::vector m_AllowedSampleCounts;
// The current screen dimensions in pixels.
- int m_Width, m_Height;
+ uint32_t m_Width, m_Height;
+ uint32_t m_UnscaledWidth, m_UnscaledHeight;
// Is the postproc manager initialized? Buffers created? Default effect loaded?
bool m_IsInitialized;
// Creates blur textures at various scales, for bloom, DOF, etc.
void ApplyBlur(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext);
// High quality GPU image scaling to half size. outTex must have exactly half the size
// of inTex. inWidth and inHeight are the dimensions of inTex in texels.
void ApplyBlurDownscale2x(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
Renderer::Backend::IFramebuffer* framebuffer,
Renderer::Backend::ITexture* inTex,
int inWidth, int inHeight);
// GPU-based Gaussian blur in two passes. inOutTex contains the input image and will be filled
// with the blurred image. tempTex must have the same size as inOutTex.
// inWidth and inHeight are the dimensions of the images in texels.
void ApplyBlurGauss(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
Renderer::Backend::ITexture* inTex,
Renderer::Backend::ITexture* tempTex,
Renderer::Backend::IFramebuffer* tempFramebuffer,
Renderer::Backend::IFramebuffer* outFramebuffer,
int inWidth, int inHeight);
// Applies a pass of a given effect to the entire current framebuffer. The shader is
// provided with a number of general-purpose variables, including the rendered screen so far,
// the depth buffer, a number of blur textures, the screen size, the zNear/zFar planes and
// some other parameters used by the optional bloom/HDR pass.
void ApplyEffect(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
const CShaderTechniquePtr& shaderTech, int pass);
// Delete all allocated buffers/textures from GPU memory.
void Cleanup();
// Delete existing buffers/textures and create them again, using a new screen size if needed.
// (the textures are also attached to the framebuffers)
void RecreateBuffers();
};
#endif // INCLUDED_POSTPROCMANAGER
Index: ps/trunk/source/renderer/RenderingOptions.cpp
===================================================================
--- ps/trunk/source/renderer/RenderingOptions.cpp (revision 28009)
+++ ps/trunk/source/renderer/RenderingOptions.cpp (revision 28010)
@@ -1,304 +1,318 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#include "precompiled.h"
#include "RenderingOptions.h"
#include "graphics/TextureManager.h"
#include "ps/CLogger.h"
#include "ps/ConfigDB.h"
#include "ps/CStr.h"
#include "ps/CStrInternStatic.h"
#include "ps/VideoMode.h"
#include "renderer/backend/IDevice.h"
#include "renderer/Renderer.h"
#include "renderer/PostprocManager.h"
#include "renderer/SceneRenderer.h"
#include "renderer/ShadowMap.h"
#include "renderer/WaterManager.h"
CRenderingOptions g_RenderingOptions;
class CRenderingOptions::ConfigHooks
{
public:
std::vector::iterator begin() { return hooks.begin(); }
std::vector::iterator end() { return hooks.end(); }
template
void Setup(CStr8 name, T& variable)
{
hooks.emplace_back(g_ConfigDB.RegisterHookAndCall(name, [name, &variable]() { CFG_GET_VAL(name, variable); }));
}
void Setup(CStr8 name, std::function hook)
{
hooks.emplace_back(g_ConfigDB.RegisterHookAndCall(name, hook));
}
void clear() { hooks.clear(); }
private:
std::vector hooks;
};
RenderPath RenderPathEnum::FromString(const CStr8& name)
{
if (name == "default")
return DEFAULT;
if (name == "fixed")
return FIXED;
if (name == "shader")
return SHADER;
LOGWARNING("Unknown render path %s", name.c_str());
return DEFAULT;
}
CStr8 RenderPathEnum::ToString(RenderPath path)
{
switch (path)
{
case RenderPath::DEFAULT:
return "default";
case RenderPath::FIXED:
return "fixed";
case RenderPath::SHADER:
return "shader";
}
return "default"; // Silence warning about reaching end of non-void function.
}
RenderDebugMode RenderDebugModeEnum::FromString(const CStr8& name)
{
if (name == str_RENDER_DEBUG_MODE_NONE.c_str())
return RenderDebugMode::NONE;
if (name == str_RENDER_DEBUG_MODE_AO.c_str())
return RenderDebugMode::AO;
if (name == str_RENDER_DEBUG_MODE_ALPHA.c_str())
return RenderDebugMode::ALPHA;
if (name == str_RENDER_DEBUG_MODE_CUSTOM.c_str())
return RenderDebugMode::CUSTOM;
LOGWARNING("Unknown render debug mode %s", name.c_str());
return RenderDebugMode::NONE;
}
CStrIntern RenderDebugModeEnum::ToString(RenderDebugMode mode)
{
switch (mode)
{
case RenderDebugMode::AO:
return str_RENDER_DEBUG_MODE_AO;
case RenderDebugMode::ALPHA:
return str_RENDER_DEBUG_MODE_ALPHA;
case RenderDebugMode::CUSTOM:
return str_RENDER_DEBUG_MODE_CUSTOM;
default:
break;
}
return str_RENDER_DEBUG_MODE_NONE;
}
CRenderingOptions::CRenderingOptions() : m_ConfigHooks(new ConfigHooks())
{
m_RenderPath = RenderPath::DEFAULT;
m_Shadows = false;
m_WaterEffects = false;
m_WaterFancyEffects = false;
m_WaterRealDepth = false;
m_WaterRefraction = false;
m_WaterReflection = false;
m_ShadowAlphaFix = false;
m_ShadowPCF = false;
m_Particles = false;
m_Silhouettes = false;
m_Fog = false;
m_GPUSkinning = false;
m_SmoothLOS = false;
m_PostProc = false;
m_DisplayFrustum = false;
m_DisplayShadowsFrustum = false;
m_RenderActors = true;
}
CRenderingOptions::~CRenderingOptions()
{
ClearHooks();
}
void CRenderingOptions::ReadConfigAndSetupHooks()
{
m_ConfigHooks->Setup("renderpath", [this]() {
CStr renderPath;
CFG_GET_VAL("renderpath", renderPath);
SetRenderPath(RenderPathEnum::FromString(renderPath));
});
m_ConfigHooks->Setup("shadowquality", []() {
if (CRenderer::IsInitialised())
g_Renderer.GetSceneRenderer().GetShadowMap().RecreateTexture();
});
m_ConfigHooks->Setup("shadowscascadecount", []() {
if (CRenderer::IsInitialised())
{
g_Renderer.GetSceneRenderer().GetShadowMap().RecreateTexture();
g_Renderer.MakeShadersDirty();
}
});
m_ConfigHooks->Setup("shadowscovermap", []() {
if (CRenderer::IsInitialised())
{
g_Renderer.GetSceneRenderer().GetShadowMap().RecreateTexture();
g_Renderer.MakeShadersDirty();
}
});
m_ConfigHooks->Setup("shadowscutoffdistance", []() {
if (CRenderer::IsInitialised())
g_Renderer.GetSceneRenderer().GetShadowMap().RecreateTexture();
});
m_ConfigHooks->Setup("shadows", [this]() {
bool enabled;
CFG_GET_VAL("shadows", enabled);
SetShadows(enabled);
});
m_ConfigHooks->Setup("shadowpcf", [this]() {
bool enabled;
CFG_GET_VAL("shadowpcf", enabled);
SetShadowPCF(enabled);
});
m_ConfigHooks->Setup("postproc", m_PostProc);
m_ConfigHooks->Setup("antialiasing", []() {
if (CRenderer::IsInitialised())
g_Renderer.GetPostprocManager().UpdateAntiAliasingTechnique();
});
m_ConfigHooks->Setup("sharpness", []() {
if (CRenderer::IsInitialised())
g_Renderer.GetPostprocManager().UpdateSharpnessFactor();
});
m_ConfigHooks->Setup("sharpening", []() {
if (CRenderer::IsInitialised())
g_Renderer.GetPostprocManager().UpdateSharpeningTechnique();
});
+ m_ConfigHooks->Setup("renderer.scale", []()
+ {
+ if (CRenderer::IsInitialised())
+ g_Renderer.GetPostprocManager().Resize();
+ });
+
+ m_ConfigHooks->Setup("renderer.upscale.technique", []()
+ {
+ CStr upscaleName;
+ CFG_GET_VAL("renderer.upscale.technique", upscaleName);
+ if (CRenderer::IsInitialised())
+ g_Renderer.GetPostprocManager().SetUpscaleTechnique(upscaleName);
+ });
+
m_ConfigHooks->Setup("smoothlos", m_SmoothLOS);
m_ConfigHooks->Setup("watereffects", [this]() {
bool enabled;
CFG_GET_VAL("watereffects", enabled);
SetWaterEffects(enabled);
if (CRenderer::IsInitialised())
g_Renderer.GetSceneRenderer().GetWaterManager().RecreateOrLoadTexturesIfNeeded();
});
m_ConfigHooks->Setup("waterfancyeffects", [this]() {
bool enabled;
CFG_GET_VAL("waterfancyeffects", enabled);
SetWaterFancyEffects(enabled);
if (CRenderer::IsInitialised())
g_Renderer.GetSceneRenderer().GetWaterManager().RecreateOrLoadTexturesIfNeeded();
});
m_ConfigHooks->Setup("waterrealdepth", m_WaterRealDepth);
m_ConfigHooks->Setup("waterrefraction", [this]() {
bool enabled;
CFG_GET_VAL("waterrefraction", enabled);
SetWaterRefraction(enabled);
if (CRenderer::IsInitialised())
g_Renderer.GetSceneRenderer().GetWaterManager().RecreateOrLoadTexturesIfNeeded();
});
m_ConfigHooks->Setup("waterreflection", [this]() {
bool enabled;
CFG_GET_VAL("waterreflection", enabled);
SetWaterReflection(enabled);
if (CRenderer::IsInitialised())
g_Renderer.GetSceneRenderer().GetWaterManager().RecreateOrLoadTexturesIfNeeded();
});
m_ConfigHooks->Setup("particles", m_Particles);
m_ConfigHooks->Setup("fog", [this]() {
bool enabled;
CFG_GET_VAL("fog", enabled);
SetFog(enabled);
});
m_ConfigHooks->Setup("silhouettes", m_Silhouettes);
m_ConfigHooks->Setup("gpuskinning", [this]() {
bool enabled;
CFG_GET_VAL("gpuskinning", enabled);
if (enabled)
{
if (g_VideoMode.GetBackendDevice()->GetBackend() == Renderer::Backend::Backend::GL_ARB)
LOGWARNING("GPUSkinning has been disabled, because it is not supported with ARB shaders.");
else if (g_VideoMode.GetBackendDevice()->GetBackend() == Renderer::Backend::Backend::VULKAN)
LOGWARNING("GPUSkinning has been disabled, because it is not supported for Vulkan backend yet.");
else
m_GPUSkinning = true;
}
});
m_ConfigHooks->Setup("renderactors", m_RenderActors);
m_ConfigHooks->Setup("textures.quality", []() {
if (CRenderer::IsInitialised())
g_Renderer.GetTextureManager().OnQualityChanged();
});
m_ConfigHooks->Setup("textures.maxanisotropy", []() {
if (CRenderer::IsInitialised())
g_Renderer.GetTextureManager().OnQualityChanged();
});
}
void CRenderingOptions::ClearHooks()
{
m_ConfigHooks->clear();
}
void CRenderingOptions::SetShadows(bool value)
{
m_Shadows = value;
if (CRenderer::IsInitialised())
g_Renderer.MakeShadersDirty();
}
void CRenderingOptions::SetShadowPCF(bool value)
{
m_ShadowPCF = value;
if (CRenderer::IsInitialised())
g_Renderer.MakeShadersDirty();
}
void CRenderingOptions::SetFog(bool value)
{
m_Fog = value;
if (CRenderer::IsInitialised())
g_Renderer.MakeShadersDirty();
}
void CRenderingOptions::SetRenderPath(RenderPath value)
{
m_RenderPath = value;
if (CRenderer::IsInitialised())
g_Renderer.SetRenderPath(m_RenderPath);
}
void CRenderingOptions::SetRenderDebugMode(RenderDebugMode value)
{
m_RenderDebugMode = value;
if (CRenderer::IsInitialised())
g_Renderer.MakeShadersDirty();
}
Index: ps/trunk/source/renderer/backend/ITexture.h
===================================================================
--- ps/trunk/source/renderer/backend/ITexture.h (revision 28009)
+++ ps/trunk/source/renderer/backend/ITexture.h (revision 28010)
@@ -1,67 +1,68 @@
-/* Copyright (C) 2022 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#ifndef INCLUDED_RENDERER_BACKEND_ITEXTURE
#define INCLUDED_RENDERER_BACKEND_ITEXTURE
#include "renderer/backend/Format.h"
#include "renderer/backend/IDeviceObject.h"
#include "renderer/backend/Sampler.h"
#include
namespace Renderer
{
namespace Backend
{
class ITexture : public IDeviceObject
{
public:
enum class Type
{
TEXTURE_2D,
TEXTURE_2D_MULTISAMPLE,
TEXTURE_CUBE
};
// Using a struct instead of a enum allows using the same syntax while
// avoiding adding operator overrides and additional checks on casts.
struct Usage
{
static constexpr uint32_t TRANSFER_SRC = 1u << 0u;
static constexpr uint32_t TRANSFER_DST = 1u << 1u;
static constexpr uint32_t SAMPLED = 1u << 2u;
static constexpr uint32_t COLOR_ATTACHMENT = 1u << 3u;
static constexpr uint32_t DEPTH_STENCIL_ATTACHMENT = 1u << 4u;
+ static constexpr uint32_t STORAGE = 1u << 5u;
};
virtual Type GetType() const = 0;
virtual uint32_t GetUsage() const = 0;
virtual Format GetFormat() const = 0;
virtual uint32_t GetWidth() const = 0;
virtual uint32_t GetHeight() const = 0;
virtual uint32_t GetMIPLevelCount() const = 0;
};
} // namespace Backend
} // namespace Renderer
#endif // INCLUDED_RENDERER_BACKEND_ITEXTURE
Index: ps/trunk/source/renderer/backend/dummy/Device.h
===================================================================
--- ps/trunk/source/renderer/backend/dummy/Device.h (revision 28009)
+++ ps/trunk/source/renderer/backend/dummy/Device.h (revision 28010)
@@ -1,121 +1,124 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#ifndef INCLUDED_RENDERER_BACKEND_DUMMY_DEVICE
#define INCLUDED_RENDERER_BACKEND_DUMMY_DEVICE
#include "renderer/backend/dummy/DeviceForward.h"
#include "renderer/backend/IDevice.h"
#include
#include
#include
class CShaderDefines;
namespace Renderer
{
namespace Backend
{
namespace Dummy
{
class CDeviceCommandContext;
class CDevice : public IDevice
{
public:
CDevice();
~CDevice() override;
Backend GetBackend() const override { return Backend::DUMMY; }
const std::string& GetName() const override { return m_Name; }
const std::string& GetVersion() const override { return m_Version; }
const std::string& GetDriverInformation() const override { return m_DriverInformation; }
const std::vector& GetExtensions() const override { return m_Extensions; }
void Report(const ScriptRequest& rq, JS::HandleValue settings) override;
std::unique_ptr CreateCommandContext() override;
std::unique_ptr CreateGraphicsPipelineState(
const SGraphicsPipelineStateDesc& pipelineStateDesc) override;
+ std::unique_ptr CreateComputePipelineState(
+ const SComputePipelineStateDesc& pipelineStateDesc) override;
+
std::unique_ptr CreateVertexInputLayout(
const PS::span attributes) override;
std::unique_ptr CreateTexture(
const char* name, const ITexture::Type type, const uint32_t usage,
const Format format, const uint32_t width, const uint32_t height,
const Sampler::Desc& defaultSamplerDesc, const uint32_t MIPLevelCount, const uint32_t sampleCount) override;
std::unique_ptr CreateTexture2D(
const char* name, const uint32_t usage,
const Format format, const uint32_t width, const uint32_t height,
const Sampler::Desc& defaultSamplerDesc, const uint32_t MIPLevelCount = 1, const uint32_t sampleCount = 1) override;
std::unique_ptr CreateFramebuffer(
const char* name, SColorAttachment* colorAttachment,
SDepthStencilAttachment* depthStencilAttachment) override;
std::unique_ptr CreateBuffer(
const char* name, const IBuffer::Type type, const uint32_t size, const bool dynamic) override;
std::unique_ptr CreateShaderProgram(
const CStr& name, const CShaderDefines& defines) override;
bool AcquireNextBackbuffer() override;
IFramebuffer* GetCurrentBackbuffer(
const AttachmentLoadOp, const AttachmentStoreOp,
const AttachmentLoadOp, const AttachmentStoreOp) override;
void Present() override;
void OnWindowResize(const uint32_t width, const uint32_t height) override;
bool IsTextureFormatSupported(const Format format) const override;
bool IsFramebufferFormatSupported(const Format format) const override;
Format GetPreferredDepthStencilFormat(
const uint32_t usage, const bool depth, const bool stencil) const override;
const Capabilities& GetCapabilities() const override { return m_Capabilities; }
protected:
std::string m_Name;
std::string m_Version;
std::string m_DriverInformation;
std::vector m_Extensions;
std::unique_ptr m_Backbuffer;
Capabilities m_Capabilities{};
};
} // namespace Dummy
} // namespace Backend
} // namespace Renderer
#endif // INCLUDED_RENDERER_BACKEND_DUMMY_DEVICE
Index: ps/trunk/source/renderer/backend/dummy/PipelineState.cpp
===================================================================
--- ps/trunk/source/renderer/backend/dummy/PipelineState.cpp (revision 28009)
+++ ps/trunk/source/renderer/backend/dummy/PipelineState.cpp (revision 28010)
@@ -1,52 +1,67 @@
-/* Copyright (C) 2022 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#include "precompiled.h"
#include "PipelineState.h"
#include "renderer/backend/dummy/Device.h"
namespace Renderer
{
namespace Backend
{
namespace Dummy
{
// static
std::unique_ptr CGraphicsPipelineState::Create(
CDevice* device, const SGraphicsPipelineStateDesc& desc)
{
std::unique_ptr pipelineState{new CGraphicsPipelineState()};
pipelineState->m_Device = device;
pipelineState->m_Desc = desc;
return pipelineState;
}
IDevice* CGraphicsPipelineState::GetDevice()
{
return m_Device;
}
+// static
+std::unique_ptr CComputePipelineState::Create(
+ CDevice* device, const SComputePipelineStateDesc& desc)
+{
+ std::unique_ptr pipelineState{new CComputePipelineState()};
+ pipelineState->m_Device = device;
+ pipelineState->m_Desc = desc;
+ return pipelineState;
+}
+
+IDevice* CComputePipelineState::GetDevice()
+{
+ return m_Device;
+}
+
} // namespace Dummy
} // namespace Backend
} // namespace Renderer
Index: ps/trunk/source/renderer/backend/gl/Device.h
===================================================================
--- ps/trunk/source/renderer/backend/gl/Device.h (revision 28009)
+++ ps/trunk/source/renderer/backend/gl/Device.h (revision 28010)
@@ -1,165 +1,168 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#ifndef INCLUDED_RENDERER_BACKEND_GL_DEVICE
#define INCLUDED_RENDERER_BACKEND_GL_DEVICE
#include "renderer/backend/Format.h"
#include "renderer/backend/gl/Buffer.h"
#include "renderer/backend/gl/DeviceForward.h"
#include "renderer/backend/gl/Framebuffer.h"
#include "renderer/backend/gl/ShaderProgram.h"
#include "renderer/backend/gl/Texture.h"
#include "renderer/backend/IDevice.h"
#include "scriptinterface/ScriptForward.h"
#include
#include
#include
#include
#include
typedef struct SDL_Window SDL_Window;
typedef void* SDL_GLContext;
namespace Renderer
{
namespace Backend
{
namespace GL
{
class CDeviceCommandContext;
class CDevice final : public IDevice
{
public:
~CDevice() override;
/**
* Creates the GL device and the GL context for the window if it presents.
*/
static std::unique_ptr Create(SDL_Window* window, const bool arb);
Backend GetBackend() const override { return m_ARB ? Backend::GL_ARB : Backend::GL; }
const std::string& GetName() const override { return m_Name; }
const std::string& GetVersion() const override { return m_Version; }
const std::string& GetDriverInformation() const override { return m_DriverInformation; }
const std::vector& GetExtensions() const override { return m_Extensions; }
void Report(const ScriptRequest& rq, JS::HandleValue settings) override;
std::unique_ptr CreateCommandContext() override;
std::unique_ptr CreateGraphicsPipelineState(
const SGraphicsPipelineStateDesc& pipelineStateDesc) override;
+ std::unique_ptr CreateComputePipelineState(
+ const SComputePipelineStateDesc& pipelineStateDesc) override;
+
std::unique_ptr CreateVertexInputLayout(
const PS::span attributes) override;
CDeviceCommandContext* GetActiveCommandContext() { return m_ActiveCommandContext; }
std::unique_ptr CreateTexture(
const char* name, const ITexture::Type type, const uint32_t usage,
const Format format, const uint32_t width, const uint32_t height,
const Sampler::Desc& defaultSamplerDesc, const uint32_t MIPLevelCount, const uint32_t sampleCount) override;
std::unique_ptr CreateTexture2D(
const char* name, const uint32_t usage,
const Format format, const uint32_t width, const uint32_t height,
const Sampler::Desc& defaultSamplerDesc, const uint32_t MIPLevelCount = 1, const uint32_t sampleCount = 1) override;
std::unique_ptr CreateFramebuffer(
const char* name, SColorAttachment* colorAttachment,
SDepthStencilAttachment* depthStencilAttachment) override;
std::unique_ptr CreateBuffer(
const char* name, const IBuffer::Type type, const uint32_t size, const bool dynamic) override;
std::unique_ptr CreateShaderProgram(
const CStr& name, const CShaderDefines& defines) override;
bool AcquireNextBackbuffer() override;
IFramebuffer* GetCurrentBackbuffer(
const AttachmentLoadOp colorAttachmentLoadOp,
const AttachmentStoreOp colorAttachmentStoreOp,
const AttachmentLoadOp depthStencilAttachmentLoadOp,
const AttachmentStoreOp depthStencilAttachmentStoreOp) override;
void Present() override;
void OnWindowResize(const uint32_t width, const uint32_t height) override;
bool UseFramebufferInvalidating() const { return m_UseFramebufferInvalidating; }
bool IsTextureFormatSupported(const Format format) const override;
bool IsFramebufferFormatSupported(const Format format) const override;
Format GetPreferredDepthStencilFormat(
const uint32_t usage, const bool depth, const bool stencil) const override;
const Capabilities& GetCapabilities() const override { return m_Capabilities; }
private:
CDevice();
SDL_Window* m_Window = nullptr;
SDL_GLContext m_Context = nullptr;
int m_SurfaceDrawableWidth = 0, m_SurfaceDrawableHeight = 0;
bool m_ARB = false;
std::string m_Name;
std::string m_Version;
std::string m_DriverInformation;
std::vector m_Extensions;
// GL can have the only one command context at once.
// TODO: remove as soon as we have no GL code outside backend, currently
// it's used only as a helper for transition.
CDeviceCommandContext* m_ActiveCommandContext = nullptr;
using BackbufferKey = std::tuple<
AttachmentLoadOp, AttachmentStoreOp,
AttachmentLoadOp, AttachmentStoreOp>;
struct BackbufferKeyHash
{
size_t operator()(const BackbufferKey& key) const;
};
// We use std::unordered_map to avoid storing sizes of Attachment*Op
// enumerations. If it becomes a performance issue we'll replace it
// by an array.
std::unordered_map<
BackbufferKey, std::unique_ptr, BackbufferKeyHash> m_Backbuffers;
bool m_BackbufferAcquired = false;
bool m_UseFramebufferInvalidating = false;
Capabilities m_Capabilities{};
};
} // namespace GL
} // namespace Backend
} // namespace Renderer
#endif // INCLUDED_RENDERER_BACKEND_GL_DEVICE
Index: ps/trunk/binaries/data/mods/mod/shaders/glsl/ffx_a.h
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/glsl/ffx_a.h (nonexistent)
+++ ps/trunk/binaries/data/mods/mod/shaders/glsl/ffx_a.h (revision 28010)
@@ -0,0 +1,2656 @@
+//==============================================================================================================================
+//
+// [A] SHADER PORTABILITY 1.20210629
+//
+//==============================================================================================================================
+// FidelityFX Super Resolution Sample
+//
+// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//------------------------------------------------------------------------------------------------------------------------------
+// MIT LICENSE
+// ===========
+// Copyright (c) 2014 Michal Drobot (for concepts used in "FLOAT APPROXIMATIONS").
+// -----------
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+// -----------
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
+// Software.
+// -----------
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//------------------------------------------------------------------------------------------------------------------------------
+// ABOUT
+// =====
+// Common central point for high-level shading language and C portability for various shader headers.
+//------------------------------------------------------------------------------------------------------------------------------
+// DEFINES
+// =======
+// A_CPU ..... Include the CPU related code.
+// A_GPU ..... Include the GPU related code.
+// A_GLSL .... Using GLSL.
+// A_HLSL .... Using HLSL.
+// A_HLSL_6_2 Using HLSL 6.2 with new 'uint16_t' and related types (requires '-enable-16bit-types').
+// A_NO_16_BIT_CAST Don't use instructions that are not availabe in SPIR-V (needed for running A_HLSL_6_2 on Vulkan)
+// A_GCC ..... Using a GCC compatible compiler (else assume MSVC compatible compiler by default).
+// =======
+// A_BYTE .... Support 8-bit integer.
+// A_HALF .... Support 16-bit integer and floating point.
+// A_LONG .... Support 64-bit integer.
+// A_DUBL .... Support 64-bit floating point.
+// =======
+// A_WAVE .... Support wave-wide operations.
+//------------------------------------------------------------------------------------------------------------------------------
+// To get #include "ffx_a.h" working in GLSL use '#extension GL_GOOGLE_include_directive:require'.
+//------------------------------------------------------------------------------------------------------------------------------
+// SIMPLIFIED TYPE SYSTEM
+// ======================
+// - All ints will be unsigned with exception of when signed is required.
+// - Type naming simplified and shortened "A<#components>",
+// - H = 16-bit float (half)
+// - F = 32-bit float (float)
+// - D = 64-bit float (double)
+// - P = 1-bit integer (predicate, not using bool because 'B' is used for byte)
+// - B = 8-bit integer (byte)
+// - W = 16-bit integer (word)
+// - U = 32-bit integer (unsigned)
+// - L = 64-bit integer (long)
+// - Using "AS<#components>" for signed when required.
+//------------------------------------------------------------------------------------------------------------------------------
+// TODO
+// ====
+// - Make sure 'ALerp*(a,b,m)' does 'b*m+(-a*m+a)' (2 ops).
+//------------------------------------------------------------------------------------------------------------------------------
+// CHANGE LOG
+// ==========
+// 20200914 - Expanded wave ops and prx code.
+// 20200713 - Added [ZOL] section, fixed serious bugs in sRGB and Rec.709 color conversion code, etc.
+//==============================================================================================================================
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// COMMON
+//==============================================================================================================================
+#define A_2PI 6.28318530718
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//
+// CPU
+//
+//
+//==============================================================================================================================
+#ifdef A_CPU
+ // Supporting user defined overrides.
+ #ifndef A_RESTRICT
+ #define A_RESTRICT __restrict
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifndef A_STATIC
+ #define A_STATIC static
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ // Same types across CPU and GPU.
+ // Predicate uses 32-bit integer (C friendly bool).
+ typedef uint32_t AP1;
+ typedef float AF1;
+ typedef double AD1;
+ typedef uint8_t AB1;
+ typedef uint16_t AW1;
+ typedef uint32_t AU1;
+ typedef uint64_t AL1;
+ typedef int8_t ASB1;
+ typedef int16_t ASW1;
+ typedef int32_t ASU1;
+ typedef int64_t ASL1;
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AD1_(a) ((AD1)(a))
+ #define AF1_(a) ((AF1)(a))
+ #define AL1_(a) ((AL1)(a))
+ #define AU1_(a) ((AU1)(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ASL1_(a) ((ASL1)(a))
+ #define ASU1_(a) ((ASU1)(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AU1 AU1_AF1(AF1 a){union{AF1 f;AU1 u;}bits;bits.f=a;return bits.u;}
+//------------------------------------------------------------------------------------------------------------------------------
+ #define A_TRUE 1
+ #define A_FALSE 0
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+// CPU/GPU PORTING
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// Get CPU and GPU to share all setup code, without duplicate code paths.
+// This uses a lower-case prefix for special vector constructs.
+// - In C restrict pointers are used.
+// - In the shading language, in/inout/out arguments are used.
+// This depends on the ability to access a vector value in both languages via array syntax (aka color[2]).
+//==============================================================================================================================
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// VECTOR ARGUMENT/RETURN/INITIALIZATION PORTABILITY
+//==============================================================================================================================
+ #define retAD2 AD1 *A_RESTRICT
+ #define retAD3 AD1 *A_RESTRICT
+ #define retAD4 AD1 *A_RESTRICT
+ #define retAF2 AF1 *A_RESTRICT
+ #define retAF3 AF1 *A_RESTRICT
+ #define retAF4 AF1 *A_RESTRICT
+ #define retAL2 AL1 *A_RESTRICT
+ #define retAL3 AL1 *A_RESTRICT
+ #define retAL4 AL1 *A_RESTRICT
+ #define retAU2 AU1 *A_RESTRICT
+ #define retAU3 AU1 *A_RESTRICT
+ #define retAU4 AU1 *A_RESTRICT
+//------------------------------------------------------------------------------------------------------------------------------
+ #define inAD2 AD1 *A_RESTRICT
+ #define inAD3 AD1 *A_RESTRICT
+ #define inAD4 AD1 *A_RESTRICT
+ #define inAF2 AF1 *A_RESTRICT
+ #define inAF3 AF1 *A_RESTRICT
+ #define inAF4 AF1 *A_RESTRICT
+ #define inAL2 AL1 *A_RESTRICT
+ #define inAL3 AL1 *A_RESTRICT
+ #define inAL4 AL1 *A_RESTRICT
+ #define inAU2 AU1 *A_RESTRICT
+ #define inAU3 AU1 *A_RESTRICT
+ #define inAU4 AU1 *A_RESTRICT
+//------------------------------------------------------------------------------------------------------------------------------
+ #define inoutAD2 AD1 *A_RESTRICT
+ #define inoutAD3 AD1 *A_RESTRICT
+ #define inoutAD4 AD1 *A_RESTRICT
+ #define inoutAF2 AF1 *A_RESTRICT
+ #define inoutAF3 AF1 *A_RESTRICT
+ #define inoutAF4 AF1 *A_RESTRICT
+ #define inoutAL2 AL1 *A_RESTRICT
+ #define inoutAL3 AL1 *A_RESTRICT
+ #define inoutAL4 AL1 *A_RESTRICT
+ #define inoutAU2 AU1 *A_RESTRICT
+ #define inoutAU3 AU1 *A_RESTRICT
+ #define inoutAU4 AU1 *A_RESTRICT
+//------------------------------------------------------------------------------------------------------------------------------
+ #define outAD2 AD1 *A_RESTRICT
+ #define outAD3 AD1 *A_RESTRICT
+ #define outAD4 AD1 *A_RESTRICT
+ #define outAF2 AF1 *A_RESTRICT
+ #define outAF3 AF1 *A_RESTRICT
+ #define outAF4 AF1 *A_RESTRICT
+ #define outAL2 AL1 *A_RESTRICT
+ #define outAL3 AL1 *A_RESTRICT
+ #define outAL4 AL1 *A_RESTRICT
+ #define outAU2 AU1 *A_RESTRICT
+ #define outAU3 AU1 *A_RESTRICT
+ #define outAU4 AU1 *A_RESTRICT
+//------------------------------------------------------------------------------------------------------------------------------
+ #define varAD2(x) AD1 x[2]
+ #define varAD3(x) AD1 x[3]
+ #define varAD4(x) AD1 x[4]
+ #define varAF2(x) AF1 x[2]
+ #define varAF3(x) AF1 x[3]
+ #define varAF4(x) AF1 x[4]
+ #define varAL2(x) AL1 x[2]
+ #define varAL3(x) AL1 x[3]
+ #define varAL4(x) AL1 x[4]
+ #define varAU2(x) AU1 x[2]
+ #define varAU3(x) AU1 x[3]
+ #define varAU4(x) AU1 x[4]
+//------------------------------------------------------------------------------------------------------------------------------
+ #define initAD2(x,y) {x,y}
+ #define initAD3(x,y,z) {x,y,z}
+ #define initAD4(x,y,z,w) {x,y,z,w}
+ #define initAF2(x,y) {x,y}
+ #define initAF3(x,y,z) {x,y,z}
+ #define initAF4(x,y,z,w) {x,y,z,w}
+ #define initAL2(x,y) {x,y}
+ #define initAL3(x,y,z) {x,y,z}
+ #define initAL4(x,y,z,w) {x,y,z,w}
+ #define initAU2(x,y) {x,y}
+ #define initAU3(x,y,z) {x,y,z}
+ #define initAU4(x,y,z,w) {x,y,z,w}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// SCALAR RETURN OPS
+//------------------------------------------------------------------------------------------------------------------------------
+// TODO
+// ====
+// - Replace transcendentals with manual versions.
+//==============================================================================================================================
+ #ifdef A_GCC
+ A_STATIC AD1 AAbsD1(AD1 a){return __builtin_fabs(a);}
+ A_STATIC AF1 AAbsF1(AF1 a){return __builtin_fabsf(a);}
+ A_STATIC AU1 AAbsSU1(AU1 a){return AU1_(__builtin_abs(ASU1_(a)));}
+ A_STATIC AL1 AAbsSL1(AL1 a){return AL1_(__builtin_llabs(ASL1_(a)));}
+ #else
+ A_STATIC AD1 AAbsD1(AD1 a){return fabs(a);}
+ A_STATIC AF1 AAbsF1(AF1 a){return fabsf(a);}
+ A_STATIC AU1 AAbsSU1(AU1 a){return AU1_(abs(ASU1_(a)));}
+ A_STATIC AL1 AAbsSL1(AL1 a){return AL1_(labs((long)ASL1_(a)));}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_GCC
+ A_STATIC AD1 ACosD1(AD1 a){return __builtin_cos(a);}
+ A_STATIC AF1 ACosF1(AF1 a){return __builtin_cosf(a);}
+ #else
+ A_STATIC AD1 ACosD1(AD1 a){return cos(a);}
+ A_STATIC AF1 ACosF1(AF1 a){return cosf(a);}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 ADotD2(inAD2 a,inAD2 b){return a[0]*b[0]+a[1]*b[1];}
+ A_STATIC AD1 ADotD3(inAD3 a,inAD3 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2];}
+ A_STATIC AD1 ADotD4(inAD4 a,inAD4 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]+a[3]*b[3];}
+ A_STATIC AF1 ADotF2(inAF2 a,inAF2 b){return a[0]*b[0]+a[1]*b[1];}
+ A_STATIC AF1 ADotF3(inAF3 a,inAF3 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2];}
+ A_STATIC AF1 ADotF4(inAF4 a,inAF4 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]+a[3]*b[3];}
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_GCC
+ A_STATIC AD1 AExp2D1(AD1 a){return __builtin_exp2(a);}
+ A_STATIC AF1 AExp2F1(AF1 a){return __builtin_exp2f(a);}
+ #else
+ A_STATIC AD1 AExp2D1(AD1 a){return exp2(a);}
+ A_STATIC AF1 AExp2F1(AF1 a){return exp2f(a);}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_GCC
+ A_STATIC AD1 AFloorD1(AD1 a){return __builtin_floor(a);}
+ A_STATIC AF1 AFloorF1(AF1 a){return __builtin_floorf(a);}
+ #else
+ A_STATIC AD1 AFloorD1(AD1 a){return floor(a);}
+ A_STATIC AF1 AFloorF1(AF1 a){return floorf(a);}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 ALerpD1(AD1 a,AD1 b,AD1 c){return b*c+(-a*c+a);}
+ A_STATIC AF1 ALerpF1(AF1 a,AF1 b,AF1 c){return b*c+(-a*c+a);}
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_GCC
+ A_STATIC AD1 ALog2D1(AD1 a){return __builtin_log2(a);}
+ A_STATIC AF1 ALog2F1(AF1 a){return __builtin_log2f(a);}
+ #else
+ A_STATIC AD1 ALog2D1(AD1 a){return log2(a);}
+ A_STATIC AF1 ALog2F1(AF1 a){return log2f(a);}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 AMaxD1(AD1 a,AD1 b){return a>b?a:b;}
+ A_STATIC AF1 AMaxF1(AF1 a,AF1 b){return a>b?a:b;}
+ A_STATIC AL1 AMaxL1(AL1 a,AL1 b){return a>b?a:b;}
+ A_STATIC AU1 AMaxU1(AU1 a,AU1 b){return a>b?a:b;}
+//------------------------------------------------------------------------------------------------------------------------------
+ // These follow the convention that A integer types don't have signage, until they are operated on.
+ A_STATIC AL1 AMaxSL1(AL1 a,AL1 b){return (ASL1_(a)>ASL1_(b))?a:b;}
+ A_STATIC AU1 AMaxSU1(AU1 a,AU1 b){return (ASU1_(a)>ASU1_(b))?a:b;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 AMinD1(AD1 a,AD1 b){return a>ASL1_(b));}
+ A_STATIC AU1 AShrSU1(AU1 a,AU1 b){return AU1_(ASU1_(a)>>ASU1_(b));}
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_GCC
+ A_STATIC AD1 ASinD1(AD1 a){return __builtin_sin(a);}
+ A_STATIC AF1 ASinF1(AF1 a){return __builtin_sinf(a);}
+ #else
+ A_STATIC AD1 ASinD1(AD1 a){return sin(a);}
+ A_STATIC AF1 ASinF1(AF1 a){return sinf(a);}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_GCC
+ A_STATIC AD1 ASqrtD1(AD1 a){return __builtin_sqrt(a);}
+ A_STATIC AF1 ASqrtF1(AF1 a){return __builtin_sqrtf(a);}
+ #else
+ A_STATIC AD1 ASqrtD1(AD1 a){return sqrt(a);}
+ A_STATIC AF1 ASqrtF1(AF1 a){return sqrtf(a);}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// SCALAR RETURN OPS - DEPENDENT
+//==============================================================================================================================
+ A_STATIC AD1 AClampD1(AD1 x,AD1 n,AD1 m){return AMaxD1(n,AMinD1(x,m));}
+ A_STATIC AF1 AClampF1(AF1 x,AF1 n,AF1 m){return AMaxF1(n,AMinF1(x,m));}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 AFractD1(AD1 a){return a-AFloorD1(a);}
+ A_STATIC AF1 AFractF1(AF1 a){return a-AFloorF1(a);}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 APowD1(AD1 a,AD1 b){return AExp2D1(b*ALog2D1(a));}
+ A_STATIC AF1 APowF1(AF1 a,AF1 b){return AExp2F1(b*ALog2F1(a));}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 ARsqD1(AD1 a){return ARcpD1(ASqrtD1(a));}
+ A_STATIC AF1 ARsqF1(AF1 a){return ARcpF1(ASqrtF1(a));}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC AD1 ASatD1(AD1 a){return AMinD1(1.0,AMaxD1(0.0,a));}
+ A_STATIC AF1 ASatF1(AF1 a){return AMinF1(1.0f,AMaxF1(0.0f,a));}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// VECTOR OPS
+//------------------------------------------------------------------------------------------------------------------------------
+// These are added as needed for production or prototyping, so not necessarily a complete set.
+// They follow a convention of taking in a destination and also returning the destination value to increase utility.
+//==============================================================================================================================
+ A_STATIC retAD2 opAAbsD2(outAD2 d,inAD2 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);return d;}
+ A_STATIC retAD3 opAAbsD3(outAD3 d,inAD3 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);d[2]=AAbsD1(a[2]);return d;}
+ A_STATIC retAD4 opAAbsD4(outAD4 d,inAD4 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);d[2]=AAbsD1(a[2]);d[3]=AAbsD1(a[3]);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAAbsF2(outAF2 d,inAF2 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);return d;}
+ A_STATIC retAF3 opAAbsF3(outAF3 d,inAF3 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);d[2]=AAbsF1(a[2]);return d;}
+ A_STATIC retAF4 opAAbsF4(outAF4 d,inAF4 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);d[2]=AAbsF1(a[2]);d[3]=AAbsF1(a[3]);return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opAAddD2(outAD2 d,inAD2 a,inAD2 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];return d;}
+ A_STATIC retAD3 opAAddD3(outAD3 d,inAD3 a,inAD3 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];return d;}
+ A_STATIC retAD4 opAAddD4(outAD4 d,inAD4 a,inAD4 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];d[3]=a[3]+b[3];return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAAddF2(outAF2 d,inAF2 a,inAF2 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];return d;}
+ A_STATIC retAF3 opAAddF3(outAF3 d,inAF3 a,inAF3 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];return d;}
+ A_STATIC retAF4 opAAddF4(outAF4 d,inAF4 a,inAF4 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];d[3]=a[3]+b[3];return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opAAddOneD2(outAD2 d,inAD2 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;return d;}
+ A_STATIC retAD3 opAAddOneD3(outAD3 d,inAD3 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;return d;}
+ A_STATIC retAD4 opAAddOneD4(outAD4 d,inAD4 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;d[3]=a[3]+b;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAAddOneF2(outAF2 d,inAF2 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;return d;}
+ A_STATIC retAF3 opAAddOneF3(outAF3 d,inAF3 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;return d;}
+ A_STATIC retAF4 opAAddOneF4(outAF4 d,inAF4 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;d[3]=a[3]+b;return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opACpyD2(outAD2 d,inAD2 a){d[0]=a[0];d[1]=a[1];return d;}
+ A_STATIC retAD3 opACpyD3(outAD3 d,inAD3 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];return d;}
+ A_STATIC retAD4 opACpyD4(outAD4 d,inAD4 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];d[3]=a[3];return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opACpyF2(outAF2 d,inAF2 a){d[0]=a[0];d[1]=a[1];return d;}
+ A_STATIC retAF3 opACpyF3(outAF3 d,inAF3 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];return d;}
+ A_STATIC retAF4 opACpyF4(outAF4 d,inAF4 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];d[3]=a[3];return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opALerpD2(outAD2 d,inAD2 a,inAD2 b,inAD2 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);return d;}
+ A_STATIC retAD3 opALerpD3(outAD3 d,inAD3 a,inAD3 b,inAD3 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);d[2]=ALerpD1(a[2],b[2],c[2]);return d;}
+ A_STATIC retAD4 opALerpD4(outAD4 d,inAD4 a,inAD4 b,inAD4 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);d[2]=ALerpD1(a[2],b[2],c[2]);d[3]=ALerpD1(a[3],b[3],c[3]);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opALerpF2(outAF2 d,inAF2 a,inAF2 b,inAF2 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);return d;}
+ A_STATIC retAF3 opALerpF3(outAF3 d,inAF3 a,inAF3 b,inAF3 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);d[2]=ALerpF1(a[2],b[2],c[2]);return d;}
+ A_STATIC retAF4 opALerpF4(outAF4 d,inAF4 a,inAF4 b,inAF4 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);d[2]=ALerpF1(a[2],b[2],c[2]);d[3]=ALerpF1(a[3],b[3],c[3]);return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opALerpOneD2(outAD2 d,inAD2 a,inAD2 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);return d;}
+ A_STATIC retAD3 opALerpOneD3(outAD3 d,inAD3 a,inAD3 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);d[2]=ALerpD1(a[2],b[2],c);return d;}
+ A_STATIC retAD4 opALerpOneD4(outAD4 d,inAD4 a,inAD4 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);d[2]=ALerpD1(a[2],b[2],c);d[3]=ALerpD1(a[3],b[3],c);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opALerpOneF2(outAF2 d,inAF2 a,inAF2 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);return d;}
+ A_STATIC retAF3 opALerpOneF3(outAF3 d,inAF3 a,inAF3 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);d[2]=ALerpF1(a[2],b[2],c);return d;}
+ A_STATIC retAF4 opALerpOneF4(outAF4 d,inAF4 a,inAF4 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);d[2]=ALerpF1(a[2],b[2],c);d[3]=ALerpF1(a[3],b[3],c);return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opAMaxD2(outAD2 d,inAD2 a,inAD2 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);return d;}
+ A_STATIC retAD3 opAMaxD3(outAD3 d,inAD3 a,inAD3 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);d[2]=AMaxD1(a[2],b[2]);return d;}
+ A_STATIC retAD4 opAMaxD4(outAD4 d,inAD4 a,inAD4 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);d[2]=AMaxD1(a[2],b[2]);d[3]=AMaxD1(a[3],b[3]);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAMaxF2(outAF2 d,inAF2 a,inAF2 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);return d;}
+ A_STATIC retAF3 opAMaxF3(outAF3 d,inAF3 a,inAF3 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);d[2]=AMaxF1(a[2],b[2]);return d;}
+ A_STATIC retAF4 opAMaxF4(outAF4 d,inAF4 a,inAF4 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);d[2]=AMaxF1(a[2],b[2]);d[3]=AMaxF1(a[3],b[3]);return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opAMinD2(outAD2 d,inAD2 a,inAD2 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);return d;}
+ A_STATIC retAD3 opAMinD3(outAD3 d,inAD3 a,inAD3 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);d[2]=AMinD1(a[2],b[2]);return d;}
+ A_STATIC retAD4 opAMinD4(outAD4 d,inAD4 a,inAD4 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);d[2]=AMinD1(a[2],b[2]);d[3]=AMinD1(a[3],b[3]);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAMinF2(outAF2 d,inAF2 a,inAF2 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);return d;}
+ A_STATIC retAF3 opAMinF3(outAF3 d,inAF3 a,inAF3 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);d[2]=AMinF1(a[2],b[2]);return d;}
+ A_STATIC retAF4 opAMinF4(outAF4 d,inAF4 a,inAF4 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);d[2]=AMinF1(a[2],b[2]);d[3]=AMinF1(a[3],b[3]);return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opAMulD2(outAD2 d,inAD2 a,inAD2 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];return d;}
+ A_STATIC retAD3 opAMulD3(outAD3 d,inAD3 a,inAD3 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];return d;}
+ A_STATIC retAD4 opAMulD4(outAD4 d,inAD4 a,inAD4 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];d[3]=a[3]*b[3];return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAMulF2(outAF2 d,inAF2 a,inAF2 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];return d;}
+ A_STATIC retAF3 opAMulF3(outAF3 d,inAF3 a,inAF3 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];return d;}
+ A_STATIC retAF4 opAMulF4(outAF4 d,inAF4 a,inAF4 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];d[3]=a[3]*b[3];return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opAMulOneD2(outAD2 d,inAD2 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;return d;}
+ A_STATIC retAD3 opAMulOneD3(outAD3 d,inAD3 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;return d;}
+ A_STATIC retAD4 opAMulOneD4(outAD4 d,inAD4 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;d[3]=a[3]*b;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opAMulOneF2(outAF2 d,inAF2 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;return d;}
+ A_STATIC retAF3 opAMulOneF3(outAF3 d,inAF3 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;return d;}
+ A_STATIC retAF4 opAMulOneF4(outAF4 d,inAF4 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;d[3]=a[3]*b;return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opANegD2(outAD2 d,inAD2 a){d[0]=-a[0];d[1]=-a[1];return d;}
+ A_STATIC retAD3 opANegD3(outAD3 d,inAD3 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];return d;}
+ A_STATIC retAD4 opANegD4(outAD4 d,inAD4 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];d[3]=-a[3];return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opANegF2(outAF2 d,inAF2 a){d[0]=-a[0];d[1]=-a[1];return d;}
+ A_STATIC retAF3 opANegF3(outAF3 d,inAF3 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];return d;}
+ A_STATIC retAF4 opANegF4(outAF4 d,inAF4 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];d[3]=-a[3];return d;}
+//==============================================================================================================================
+ A_STATIC retAD2 opARcpD2(outAD2 d,inAD2 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);return d;}
+ A_STATIC retAD3 opARcpD3(outAD3 d,inAD3 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);d[2]=ARcpD1(a[2]);return d;}
+ A_STATIC retAD4 opARcpD4(outAD4 d,inAD4 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);d[2]=ARcpD1(a[2]);d[3]=ARcpD1(a[3]);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ A_STATIC retAF2 opARcpF2(outAF2 d,inAF2 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);return d;}
+ A_STATIC retAF3 opARcpF3(outAF3 d,inAF3 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);d[2]=ARcpF1(a[2]);return d;}
+ A_STATIC retAF4 opARcpF4(outAF4 d,inAF4 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);d[2]=ARcpF1(a[2]);d[3]=ARcpF1(a[3]);return d;}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// HALF FLOAT PACKING
+//==============================================================================================================================
+ // Convert float to half (in lower 16-bits of output).
+ // Same fast technique as documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
+ // Supports denormals.
+ // Conversion rules are to make computations possibly "safer" on the GPU,
+ // -INF & -NaN -> -65504
+ // +INF & +NaN -> +65504
+ A_STATIC AU1 AU1_AH1_AF1(AF1 f){
+ static AW1 base[512]={
+ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0001,0x0002,0x0004,0x0008,0x0010,0x0020,0x0040,0x0080,0x0100,
+ 0x0200,0x0400,0x0800,0x0c00,0x1000,0x1400,0x1800,0x1c00,0x2000,0x2400,0x2800,0x2c00,0x3000,0x3400,0x3800,0x3c00,
+ 0x4000,0x4400,0x4800,0x4c00,0x5000,0x5400,0x5800,0x5c00,0x6000,0x6400,0x6800,0x6c00,0x7000,0x7400,0x7800,0x7bff,
+ 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+ 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+ 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+ 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+ 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+ 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+ 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,
+ 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
+ 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
+ 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
+ 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
+ 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
+ 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
+ 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8001,0x8002,0x8004,0x8008,0x8010,0x8020,0x8040,0x8080,0x8100,
+ 0x8200,0x8400,0x8800,0x8c00,0x9000,0x9400,0x9800,0x9c00,0xa000,0xa400,0xa800,0xac00,0xb000,0xb400,0xb800,0xbc00,
+ 0xc000,0xc400,0xc800,0xcc00,0xd000,0xd400,0xd800,0xdc00,0xe000,0xe400,0xe800,0xec00,0xf000,0xf400,0xf800,0xfbff,
+ 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,
+ 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,
+ 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,
+ 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,
+ 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,
+ 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,
+ 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff};
+ static AB1 shift[512]={
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f,
+ 0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,
+ 0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f,
+ 0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,
+ 0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+ 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18};
+ union{AF1 f;AU1 u;}bits;bits.f=f;AU1 u=bits.u;AU1 i=u>>23;return (AU1)(base[i])+((u&0x7fffff)>>shift[i]);}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Used to output packed constant.
+ A_STATIC AU1 AU1_AH2_AF2(inAF2 a){return AU1_AH1_AF1(a[0])+(AU1_AH1_AF1(a[1])<<16);}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//
+// GLSL
+//
+//
+//==============================================================================================================================
+#if defined(A_GLSL) && defined(A_GPU)
+ #ifndef A_SKIP_EXT
+ #ifdef A_HALF
+ #extension GL_EXT_shader_16bit_storage:require
+ #extension GL_EXT_shader_explicit_arithmetic_types:require
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_LONG
+ #extension GL_ARB_gpu_shader_int64:require
+ #extension GL_NV_shader_atomic_int64:require
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_WAVE
+ #extension GL_KHR_shader_subgroup_arithmetic:require
+ #extension GL_KHR_shader_subgroup_ballot:require
+ #extension GL_KHR_shader_subgroup_quad:require
+ #extension GL_KHR_shader_subgroup_shuffle:require
+ #endif
+ #endif
+//==============================================================================================================================
+ #define AP1 bool
+ #define AP2 bvec2
+ #define AP3 bvec3
+ #define AP4 bvec4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AF1 float
+ #define AF2 vec2
+ #define AF3 vec3
+ #define AF4 vec4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AU1 uint
+ #define AU2 uvec2
+ #define AU3 uvec3
+ #define AU4 uvec4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ASU1 int
+ #define ASU2 ivec2
+ #define ASU3 ivec3
+ #define ASU4 ivec4
+//==============================================================================================================================
+ #define AF1_AU1(x) uintBitsToFloat(AU1(x))
+ #define AF2_AU2(x) uintBitsToFloat(AU2(x))
+ #define AF3_AU3(x) uintBitsToFloat(AU3(x))
+ #define AF4_AU4(x) uintBitsToFloat(AU4(x))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AU1_AF1(x) floatBitsToUint(AF1(x))
+ #define AU2_AF2(x) floatBitsToUint(AF2(x))
+ #define AU3_AF3(x) floatBitsToUint(AF3(x))
+ #define AU4_AF4(x) floatBitsToUint(AF4(x))
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AU1_AH1_AF1_x(AF1 a){return packHalf2x16(AF2(a,0.0));}
+ #define AU1_AH1_AF1(a) AU1_AH1_AF1_x(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AU1_AH2_AF2 packHalf2x16
+ #define AU1_AW2Unorm_AF2 packUnorm2x16
+ #define AU1_AB4Unorm_AF4 packUnorm4x8
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AF2_AH2_AU1 unpackHalf2x16
+ #define AF2_AW2Unorm_AU1 unpackUnorm2x16
+ #define AF4_AB4Unorm_AU1 unpackUnorm4x8
+//==============================================================================================================================
+ AF1 AF1_x(AF1 a){return AF1(a);}
+ AF2 AF2_x(AF1 a){return AF2(a,a);}
+ AF3 AF3_x(AF1 a){return AF3(a,a,a);}
+ AF4 AF4_x(AF1 a){return AF4(a,a,a,a);}
+ #define AF1_(a) AF1_x(AF1(a))
+ #define AF2_(a) AF2_x(AF1(a))
+ #define AF3_(a) AF3_x(AF1(a))
+ #define AF4_(a) AF4_x(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AU1_x(AU1 a){return AU1(a);}
+ AU2 AU2_x(AU1 a){return AU2(a,a);}
+ AU3 AU3_x(AU1 a){return AU3(a,a,a);}
+ AU4 AU4_x(AU1 a){return AU4(a,a,a,a);}
+ #define AU1_(a) AU1_x(AU1(a))
+ #define AU2_(a) AU2_x(AU1(a))
+ #define AU3_(a) AU3_x(AU1(a))
+ #define AU4_(a) AU4_x(AU1(a))
+//==============================================================================================================================
+ AU1 AAbsSU1(AU1 a){return AU1(abs(ASU1(a)));}
+ AU2 AAbsSU2(AU2 a){return AU2(abs(ASU2(a)));}
+ AU3 AAbsSU3(AU3 a){return AU3(abs(ASU3(a)));}
+ AU4 AAbsSU4(AU4 a){return AU4(abs(ASU4(a)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 ABfe(AU1 src,AU1 off,AU1 bits){return bitfieldExtract(src,ASU1(off),ASU1(bits));}
+ AU1 ABfi(AU1 src,AU1 ins,AU1 mask){return (ins&mask)|(src&(~mask));}
+ // Proxy for V_BFI_B32 where the 'mask' is set as 'bits', 'mask=(1<>ASU1(b));}
+ AU2 AShrSU2(AU2 a,AU2 b){return AU2(ASU2(a)>>ASU2(b));}
+ AU3 AShrSU3(AU3 a,AU3 b){return AU3(ASU3(a)>>ASU3(b));}
+ AU4 AShrSU4(AU4 a,AU4 b){return AU4(ASU4(a)>>ASU4(b));}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// GLSL BYTE
+//==============================================================================================================================
+ #ifdef A_BYTE
+ #define AB1 uint8_t
+ #define AB2 u8vec2
+ #define AB3 u8vec3
+ #define AB4 u8vec4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ASB1 int8_t
+ #define ASB2 i8vec2
+ #define ASB3 i8vec3
+ #define ASB4 i8vec4
+//------------------------------------------------------------------------------------------------------------------------------
+ AB1 AB1_x(AB1 a){return AB1(a);}
+ AB2 AB2_x(AB1 a){return AB2(a,a);}
+ AB3 AB3_x(AB1 a){return AB3(a,a,a);}
+ AB4 AB4_x(AB1 a){return AB4(a,a,a,a);}
+ #define AB1_(a) AB1_x(AB1(a))
+ #define AB2_(a) AB2_x(AB1(a))
+ #define AB3_(a) AB3_x(AB1(a))
+ #define AB4_(a) AB4_x(AB1(a))
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// GLSL HALF
+//==============================================================================================================================
+ #ifdef A_HALF
+ #define AH1 float16_t
+ #define AH2 f16vec2
+ #define AH3 f16vec3
+ #define AH4 f16vec4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AW1 uint16_t
+ #define AW2 u16vec2
+ #define AW3 u16vec3
+ #define AW4 u16vec4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ASW1 int16_t
+ #define ASW2 i16vec2
+ #define ASW3 i16vec3
+ #define ASW4 i16vec4
+//==============================================================================================================================
+ #define AH2_AU1(x) unpackFloat2x16(AU1(x))
+ AH4 AH4_AU2_x(AU2 x){return AH4(unpackFloat2x16(x.x),unpackFloat2x16(x.y));}
+ #define AH4_AU2(x) AH4_AU2_x(AU2(x))
+ #define AW2_AU1(x) unpackUint2x16(AU1(x))
+ #define AW4_AU2(x) unpackUint4x16(pack64(AU2(x)))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AU1_AH2(x) packFloat2x16(AH2(x))
+ AU2 AU2_AH4_x(AH4 x){return AU2(packFloat2x16(x.xy),packFloat2x16(x.zw));}
+ #define AU2_AH4(x) AU2_AH4_x(AH4(x))
+ #define AU1_AW2(x) packUint2x16(AW2(x))
+ #define AU2_AW4(x) unpack32(packUint4x16(AW4(x)))
+//==============================================================================================================================
+ #define AW1_AH1(x) halfBitsToUint16(AH1(x))
+ #define AW2_AH2(x) halfBitsToUint16(AH2(x))
+ #define AW3_AH3(x) halfBitsToUint16(AH3(x))
+ #define AW4_AH4(x) halfBitsToUint16(AH4(x))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AH1_AW1(x) uint16BitsToHalf(AW1(x))
+ #define AH2_AW2(x) uint16BitsToHalf(AW2(x))
+ #define AH3_AW3(x) uint16BitsToHalf(AW3(x))
+ #define AH4_AW4(x) uint16BitsToHalf(AW4(x))
+//==============================================================================================================================
+ AH1 AH1_x(AH1 a){return AH1(a);}
+ AH2 AH2_x(AH1 a){return AH2(a,a);}
+ AH3 AH3_x(AH1 a){return AH3(a,a,a);}
+ AH4 AH4_x(AH1 a){return AH4(a,a,a,a);}
+ #define AH1_(a) AH1_x(AH1(a))
+ #define AH2_(a) AH2_x(AH1(a))
+ #define AH3_(a) AH3_x(AH1(a))
+ #define AH4_(a) AH4_x(AH1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ AW1 AW1_x(AW1 a){return AW1(a);}
+ AW2 AW2_x(AW1 a){return AW2(a,a);}
+ AW3 AW3_x(AW1 a){return AW3(a,a,a);}
+ AW4 AW4_x(AW1 a){return AW4(a,a,a,a);}
+ #define AW1_(a) AW1_x(AW1(a))
+ #define AW2_(a) AW2_x(AW1(a))
+ #define AW3_(a) AW3_x(AW1(a))
+ #define AW4_(a) AW4_x(AW1(a))
+//==============================================================================================================================
+ AW1 AAbsSW1(AW1 a){return AW1(abs(ASW1(a)));}
+ AW2 AAbsSW2(AW2 a){return AW2(abs(ASW2(a)));}
+ AW3 AAbsSW3(AW3 a){return AW3(abs(ASW3(a)));}
+ AW4 AAbsSW4(AW4 a){return AW4(abs(ASW4(a)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AClampH1(AH1 x,AH1 n,AH1 m){return clamp(x,n,m);}
+ AH2 AClampH2(AH2 x,AH2 n,AH2 m){return clamp(x,n,m);}
+ AH3 AClampH3(AH3 x,AH3 n,AH3 m){return clamp(x,n,m);}
+ AH4 AClampH4(AH4 x,AH4 n,AH4 m){return clamp(x,n,m);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AFractH1(AH1 x){return fract(x);}
+ AH2 AFractH2(AH2 x){return fract(x);}
+ AH3 AFractH3(AH3 x){return fract(x);}
+ AH4 AFractH4(AH4 x){return fract(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 ALerpH1(AH1 x,AH1 y,AH1 a){return mix(x,y,a);}
+ AH2 ALerpH2(AH2 x,AH2 y,AH2 a){return mix(x,y,a);}
+ AH3 ALerpH3(AH3 x,AH3 y,AH3 a){return mix(x,y,a);}
+ AH4 ALerpH4(AH4 x,AH4 y,AH4 a){return mix(x,y,a);}
+//------------------------------------------------------------------------------------------------------------------------------
+ // No packed version of max3.
+ AH1 AMax3H1(AH1 x,AH1 y,AH1 z){return max(x,max(y,z));}
+ AH2 AMax3H2(AH2 x,AH2 y,AH2 z){return max(x,max(y,z));}
+ AH3 AMax3H3(AH3 x,AH3 y,AH3 z){return max(x,max(y,z));}
+ AH4 AMax3H4(AH4 x,AH4 y,AH4 z){return max(x,max(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AW1 AMaxSW1(AW1 a,AW1 b){return AW1(max(ASU1(a),ASU1(b)));}
+ AW2 AMaxSW2(AW2 a,AW2 b){return AW2(max(ASU2(a),ASU2(b)));}
+ AW3 AMaxSW3(AW3 a,AW3 b){return AW3(max(ASU3(a),ASU3(b)));}
+ AW4 AMaxSW4(AW4 a,AW4 b){return AW4(max(ASU4(a),ASU4(b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // No packed version of min3.
+ AH1 AMin3H1(AH1 x,AH1 y,AH1 z){return min(x,min(y,z));}
+ AH2 AMin3H2(AH2 x,AH2 y,AH2 z){return min(x,min(y,z));}
+ AH3 AMin3H3(AH3 x,AH3 y,AH3 z){return min(x,min(y,z));}
+ AH4 AMin3H4(AH4 x,AH4 y,AH4 z){return min(x,min(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AW1 AMinSW1(AW1 a,AW1 b){return AW1(min(ASU1(a),ASU1(b)));}
+ AW2 AMinSW2(AW2 a,AW2 b){return AW2(min(ASU2(a),ASU2(b)));}
+ AW3 AMinSW3(AW3 a,AW3 b){return AW3(min(ASU3(a),ASU3(b)));}
+ AW4 AMinSW4(AW4 a,AW4 b){return AW4(min(ASU4(a),ASU4(b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 ARcpH1(AH1 x){return AH1_(1.0)/x;}
+ AH2 ARcpH2(AH2 x){return AH2_(1.0)/x;}
+ AH3 ARcpH3(AH3 x){return AH3_(1.0)/x;}
+ AH4 ARcpH4(AH4 x){return AH4_(1.0)/x;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 ARsqH1(AH1 x){return AH1_(1.0)/sqrt(x);}
+ AH2 ARsqH2(AH2 x){return AH2_(1.0)/sqrt(x);}
+ AH3 ARsqH3(AH3 x){return AH3_(1.0)/sqrt(x);}
+ AH4 ARsqH4(AH4 x){return AH4_(1.0)/sqrt(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 ASatH1(AH1 x){return clamp(x,AH1_(0.0),AH1_(1.0));}
+ AH2 ASatH2(AH2 x){return clamp(x,AH2_(0.0),AH2_(1.0));}
+ AH3 ASatH3(AH3 x){return clamp(x,AH3_(0.0),AH3_(1.0));}
+ AH4 ASatH4(AH4 x){return clamp(x,AH4_(0.0),AH4_(1.0));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AW1 AShrSW1(AW1 a,AW1 b){return AW1(ASW1(a)>>ASW1(b));}
+ AW2 AShrSW2(AW2 a,AW2 b){return AW2(ASW2(a)>>ASW2(b));}
+ AW3 AShrSW3(AW3 a,AW3 b){return AW3(ASW3(a)>>ASW3(b));}
+ AW4 AShrSW4(AW4 a,AW4 b){return AW4(ASW4(a)>>ASW4(b));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// GLSL DOUBLE
+//==============================================================================================================================
+ #ifdef A_DUBL
+ #define AD1 double
+ #define AD2 dvec2
+ #define AD3 dvec3
+ #define AD4 dvec4
+//------------------------------------------------------------------------------------------------------------------------------
+ AD1 AD1_x(AD1 a){return AD1(a);}
+ AD2 AD2_x(AD1 a){return AD2(a,a);}
+ AD3 AD3_x(AD1 a){return AD3(a,a,a);}
+ AD4 AD4_x(AD1 a){return AD4(a,a,a,a);}
+ #define AD1_(a) AD1_x(AD1(a))
+ #define AD2_(a) AD2_x(AD1(a))
+ #define AD3_(a) AD3_x(AD1(a))
+ #define AD4_(a) AD4_x(AD1(a))
+//==============================================================================================================================
+ AD1 AFractD1(AD1 x){return fract(x);}
+ AD2 AFractD2(AD2 x){return fract(x);}
+ AD3 AFractD3(AD3 x){return fract(x);}
+ AD4 AFractD4(AD4 x){return fract(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AD1 ALerpD1(AD1 x,AD1 y,AD1 a){return mix(x,y,a);}
+ AD2 ALerpD2(AD2 x,AD2 y,AD2 a){return mix(x,y,a);}
+ AD3 ALerpD3(AD3 x,AD3 y,AD3 a){return mix(x,y,a);}
+ AD4 ALerpD4(AD4 x,AD4 y,AD4 a){return mix(x,y,a);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AD1 ARcpD1(AD1 x){return AD1_(1.0)/x;}
+ AD2 ARcpD2(AD2 x){return AD2_(1.0)/x;}
+ AD3 ARcpD3(AD3 x){return AD3_(1.0)/x;}
+ AD4 ARcpD4(AD4 x){return AD4_(1.0)/x;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AD1 ARsqD1(AD1 x){return AD1_(1.0)/sqrt(x);}
+ AD2 ARsqD2(AD2 x){return AD2_(1.0)/sqrt(x);}
+ AD3 ARsqD3(AD3 x){return AD3_(1.0)/sqrt(x);}
+ AD4 ARsqD4(AD4 x){return AD4_(1.0)/sqrt(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AD1 ASatD1(AD1 x){return clamp(x,AD1_(0.0),AD1_(1.0));}
+ AD2 ASatD2(AD2 x){return clamp(x,AD2_(0.0),AD2_(1.0));}
+ AD3 ASatD3(AD3 x){return clamp(x,AD3_(0.0),AD3_(1.0));}
+ AD4 ASatD4(AD4 x){return clamp(x,AD4_(0.0),AD4_(1.0));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// GLSL LONG
+//==============================================================================================================================
+ #ifdef A_LONG
+ #define AL1 uint64_t
+ #define AL2 u64vec2
+ #define AL3 u64vec3
+ #define AL4 u64vec4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ASL1 int64_t
+ #define ASL2 i64vec2
+ #define ASL3 i64vec3
+ #define ASL4 i64vec4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AL1_AU2(x) packUint2x32(AU2(x))
+ #define AU2_AL1(x) unpackUint2x32(AL1(x))
+//------------------------------------------------------------------------------------------------------------------------------
+ AL1 AL1_x(AL1 a){return AL1(a);}
+ AL2 AL2_x(AL1 a){return AL2(a,a);}
+ AL3 AL3_x(AL1 a){return AL3(a,a,a);}
+ AL4 AL4_x(AL1 a){return AL4(a,a,a,a);}
+ #define AL1_(a) AL1_x(AL1(a))
+ #define AL2_(a) AL2_x(AL1(a))
+ #define AL3_(a) AL3_x(AL1(a))
+ #define AL4_(a) AL4_x(AL1(a))
+//==============================================================================================================================
+ AL1 AAbsSL1(AL1 a){return AL1(abs(ASL1(a)));}
+ AL2 AAbsSL2(AL2 a){return AL2(abs(ASL2(a)));}
+ AL3 AAbsSL3(AL3 a){return AL3(abs(ASL3(a)));}
+ AL4 AAbsSL4(AL4 a){return AL4(abs(ASL4(a)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AL1 AMaxSL1(AL1 a,AL1 b){return AL1(max(ASU1(a),ASU1(b)));}
+ AL2 AMaxSL2(AL2 a,AL2 b){return AL2(max(ASU2(a),ASU2(b)));}
+ AL3 AMaxSL3(AL3 a,AL3 b){return AL3(max(ASU3(a),ASU3(b)));}
+ AL4 AMaxSL4(AL4 a,AL4 b){return AL4(max(ASU4(a),ASU4(b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AL1 AMinSL1(AL1 a,AL1 b){return AL1(min(ASU1(a),ASU1(b)));}
+ AL2 AMinSL2(AL2 a,AL2 b){return AL2(min(ASU2(a),ASU2(b)));}
+ AL3 AMinSL3(AL3 a,AL3 b){return AL3(min(ASU3(a),ASU3(b)));}
+ AL4 AMinSL4(AL4 a,AL4 b){return AL4(min(ASU4(a),ASU4(b)));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// WAVE OPERATIONS
+//==============================================================================================================================
+ #ifdef A_WAVE
+ // Where 'x' must be a compile time literal.
+ AF1 AWaveXorF1(AF1 v,AU1 x){return subgroupShuffleXor(v,x);}
+ AF2 AWaveXorF2(AF2 v,AU1 x){return subgroupShuffleXor(v,x);}
+ AF3 AWaveXorF3(AF3 v,AU1 x){return subgroupShuffleXor(v,x);}
+ AF4 AWaveXorF4(AF4 v,AU1 x){return subgroupShuffleXor(v,x);}
+ AU1 AWaveXorU1(AU1 v,AU1 x){return subgroupShuffleXor(v,x);}
+ AU2 AWaveXorU2(AU2 v,AU1 x){return subgroupShuffleXor(v,x);}
+ AU3 AWaveXorU3(AU3 v,AU1 x){return subgroupShuffleXor(v,x);}
+ AU4 AWaveXorU4(AU4 v,AU1 x){return subgroupShuffleXor(v,x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_HALF
+ AH2 AWaveXorH2(AH2 v,AU1 x){return AH2_AU1(subgroupShuffleXor(AU1_AH2(v),x));}
+ AH4 AWaveXorH4(AH4 v,AU1 x){return AH4_AU2(subgroupShuffleXor(AU2_AH4(v),x));}
+ AW2 AWaveXorW2(AW2 v,AU1 x){return AW2_AU1(subgroupShuffleXor(AU1_AW2(v),x));}
+ AW4 AWaveXorW4(AW4 v,AU1 x){return AW4_AU2(subgroupShuffleXor(AU2_AW4(v),x));}
+ #endif
+ #endif
+//==============================================================================================================================
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//
+// HLSL
+//
+//
+//==============================================================================================================================
+#if defined(A_HLSL) && defined(A_GPU)
+ #ifdef A_HLSL_6_2
+ #define AP1 bool
+ #define AP2 bool2
+ #define AP3 bool3
+ #define AP4 bool4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AF1 float32_t
+ #define AF2 float32_t2
+ #define AF3 float32_t3
+ #define AF4 float32_t4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AU1 uint32_t
+ #define AU2 uint32_t2
+ #define AU3 uint32_t3
+ #define AU4 uint32_t4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ASU1 int32_t
+ #define ASU2 int32_t2
+ #define ASU3 int32_t3
+ #define ASU4 int32_t4
+ #else
+ #define AP1 bool
+ #define AP2 bool2
+ #define AP3 bool3
+ #define AP4 bool4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AF1 float
+ #define AF2 float2
+ #define AF3 float3
+ #define AF4 float4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AU1 uint
+ #define AU2 uint2
+ #define AU3 uint3
+ #define AU4 uint4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ASU1 int
+ #define ASU2 int2
+ #define ASU3 int3
+ #define ASU4 int4
+ #endif
+//==============================================================================================================================
+ #define AF1_AU1(x) asfloat(AU1(x))
+ #define AF2_AU2(x) asfloat(AU2(x))
+ #define AF3_AU3(x) asfloat(AU3(x))
+ #define AF4_AU4(x) asfloat(AU4(x))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AU1_AF1(x) asuint(AF1(x))
+ #define AU2_AF2(x) asuint(AF2(x))
+ #define AU3_AF3(x) asuint(AF3(x))
+ #define AU4_AF4(x) asuint(AF4(x))
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AU1_AH1_AF1_x(AF1 a){return f32tof16(a);}
+ #define AU1_AH1_AF1(a) AU1_AH1_AF1_x(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AU1_AH2_AF2_x(AF2 a){return f32tof16(a.x)|(f32tof16(a.y)<<16);}
+ #define AU1_AH2_AF2(a) AU1_AH2_AF2_x(AF2(a))
+ #define AU1_AB4Unorm_AF4(x) D3DCOLORtoUBYTE4(AF4(x))
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 AF2_AH2_AU1_x(AU1 x){return AF2(f16tof32(x&0xFFFF),f16tof32(x>>16));}
+ #define AF2_AH2_AU1(x) AF2_AH2_AU1_x(AU1(x))
+//==============================================================================================================================
+ AF1 AF1_x(AF1 a){return AF1(a);}
+ AF2 AF2_x(AF1 a){return AF2(a,a);}
+ AF3 AF3_x(AF1 a){return AF3(a,a,a);}
+ AF4 AF4_x(AF1 a){return AF4(a,a,a,a);}
+ #define AF1_(a) AF1_x(AF1(a))
+ #define AF2_(a) AF2_x(AF1(a))
+ #define AF3_(a) AF3_x(AF1(a))
+ #define AF4_(a) AF4_x(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AU1_x(AU1 a){return AU1(a);}
+ AU2 AU2_x(AU1 a){return AU2(a,a);}
+ AU3 AU3_x(AU1 a){return AU3(a,a,a);}
+ AU4 AU4_x(AU1 a){return AU4(a,a,a,a);}
+ #define AU1_(a) AU1_x(AU1(a))
+ #define AU2_(a) AU2_x(AU1(a))
+ #define AU3_(a) AU3_x(AU1(a))
+ #define AU4_(a) AU4_x(AU1(a))
+//==============================================================================================================================
+ AU1 AAbsSU1(AU1 a){return AU1(abs(ASU1(a)));}
+ AU2 AAbsSU2(AU2 a){return AU2(abs(ASU2(a)));}
+ AU3 AAbsSU3(AU3 a){return AU3(abs(ASU3(a)));}
+ AU4 AAbsSU4(AU4 a){return AU4(abs(ASU4(a)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 ABfe(AU1 src,AU1 off,AU1 bits){AU1 mask=(1u<>off)&mask;}
+ AU1 ABfi(AU1 src,AU1 ins,AU1 mask){return (ins&mask)|(src&(~mask));}
+ AU1 ABfiM(AU1 src,AU1 ins,AU1 bits){AU1 mask=(1u<>ASU1(b));}
+ AU2 AShrSU2(AU2 a,AU2 b){return AU2(ASU2(a)>>ASU2(b));}
+ AU3 AShrSU3(AU3 a,AU3 b){return AU3(ASU3(a)>>ASU3(b));}
+ AU4 AShrSU4(AU4 a,AU4 b){return AU4(ASU4(a)>>ASU4(b));}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// HLSL BYTE
+//==============================================================================================================================
+ #ifdef A_BYTE
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// HLSL HALF
+//==============================================================================================================================
+ #ifdef A_HALF
+ #ifdef A_HLSL_6_2
+ #define AH1 float16_t
+ #define AH2 float16_t2
+ #define AH3 float16_t3
+ #define AH4 float16_t4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AW1 uint16_t
+ #define AW2 uint16_t2
+ #define AW3 uint16_t3
+ #define AW4 uint16_t4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ASW1 int16_t
+ #define ASW2 int16_t2
+ #define ASW3 int16_t3
+ #define ASW4 int16_t4
+ #else
+ #define AH1 min16float
+ #define AH2 min16float2
+ #define AH3 min16float3
+ #define AH4 min16float4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AW1 min16uint
+ #define AW2 min16uint2
+ #define AW3 min16uint3
+ #define AW4 min16uint4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ASW1 min16int
+ #define ASW2 min16int2
+ #define ASW3 min16int3
+ #define ASW4 min16int4
+ #endif
+//==============================================================================================================================
+ // Need to use manual unpack to get optimal execution (don't use packed types in buffers directly).
+ // Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/
+ AH2 AH2_AU1_x(AU1 x){AF2 t=f16tof32(AU2(x&0xFFFF,x>>16));return AH2(t);}
+ AH4 AH4_AU2_x(AU2 x){return AH4(AH2_AU1_x(x.x),AH2_AU1_x(x.y));}
+ AW2 AW2_AU1_x(AU1 x){AU2 t=AU2(x&0xFFFF,x>>16);return AW2(t);}
+ AW4 AW4_AU2_x(AU2 x){return AW4(AW2_AU1_x(x.x),AW2_AU1_x(x.y));}
+ #define AH2_AU1(x) AH2_AU1_x(AU1(x))
+ #define AH4_AU2(x) AH4_AU2_x(AU2(x))
+ #define AW2_AU1(x) AW2_AU1_x(AU1(x))
+ #define AW4_AU2(x) AW4_AU2_x(AU2(x))
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AU1_AH2_x(AH2 x){return f32tof16(x.x)+(f32tof16(x.y)<<16);}
+ AU2 AU2_AH4_x(AH4 x){return AU2(AU1_AH2_x(x.xy),AU1_AH2_x(x.zw));}
+ AU1 AU1_AW2_x(AW2 x){return AU1(x.x)+(AU1(x.y)<<16);}
+ AU2 AU2_AW4_x(AW4 x){return AU2(AU1_AW2_x(x.xy),AU1_AW2_x(x.zw));}
+ #define AU1_AH2(x) AU1_AH2_x(AH2(x))
+ #define AU2_AH4(x) AU2_AH4_x(AH4(x))
+ #define AU1_AW2(x) AU1_AW2_x(AW2(x))
+ #define AU2_AW4(x) AU2_AW4_x(AW4(x))
+//==============================================================================================================================
+ #if defined(A_HLSL_6_2) && !defined(A_NO_16_BIT_CAST)
+ #define AW1_AH1(x) asuint16(x)
+ #define AW2_AH2(x) asuint16(x)
+ #define AW3_AH3(x) asuint16(x)
+ #define AW4_AH4(x) asuint16(x)
+ #else
+ #define AW1_AH1(a) AW1(f32tof16(AF1(a)))
+ #define AW2_AH2(a) AW2(AW1_AH1((a).x),AW1_AH1((a).y))
+ #define AW3_AH3(a) AW3(AW1_AH1((a).x),AW1_AH1((a).y),AW1_AH1((a).z))
+ #define AW4_AH4(a) AW4(AW1_AH1((a).x),AW1_AH1((a).y),AW1_AH1((a).z),AW1_AH1((a).w))
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ #if defined(A_HLSL_6_2) && !defined(A_NO_16_BIT_CAST)
+ #define AH1_AW1(x) asfloat16(x)
+ #define AH2_AW2(x) asfloat16(x)
+ #define AH3_AW3(x) asfloat16(x)
+ #define AH4_AW4(x) asfloat16(x)
+ #else
+ #define AH1_AW1(a) AH1(f16tof32(AU1(a)))
+ #define AH2_AW2(a) AH2(AH1_AW1((a).x),AH1_AW1((a).y))
+ #define AH3_AW3(a) AH3(AH1_AW1((a).x),AH1_AW1((a).y),AH1_AW1((a).z))
+ #define AH4_AW4(a) AH4(AH1_AW1((a).x),AH1_AW1((a).y),AH1_AW1((a).z),AH1_AW1((a).w))
+ #endif
+//==============================================================================================================================
+ AH1 AH1_x(AH1 a){return AH1(a);}
+ AH2 AH2_x(AH1 a){return AH2(a,a);}
+ AH3 AH3_x(AH1 a){return AH3(a,a,a);}
+ AH4 AH4_x(AH1 a){return AH4(a,a,a,a);}
+ #define AH1_(a) AH1_x(AH1(a))
+ #define AH2_(a) AH2_x(AH1(a))
+ #define AH3_(a) AH3_x(AH1(a))
+ #define AH4_(a) AH4_x(AH1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ AW1 AW1_x(AW1 a){return AW1(a);}
+ AW2 AW2_x(AW1 a){return AW2(a,a);}
+ AW3 AW3_x(AW1 a){return AW3(a,a,a);}
+ AW4 AW4_x(AW1 a){return AW4(a,a,a,a);}
+ #define AW1_(a) AW1_x(AW1(a))
+ #define AW2_(a) AW2_x(AW1(a))
+ #define AW3_(a) AW3_x(AW1(a))
+ #define AW4_(a) AW4_x(AW1(a))
+//==============================================================================================================================
+ AW1 AAbsSW1(AW1 a){return AW1(abs(ASW1(a)));}
+ AW2 AAbsSW2(AW2 a){return AW2(abs(ASW2(a)));}
+ AW3 AAbsSW3(AW3 a){return AW3(abs(ASW3(a)));}
+ AW4 AAbsSW4(AW4 a){return AW4(abs(ASW4(a)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AClampH1(AH1 x,AH1 n,AH1 m){return max(n,min(x,m));}
+ AH2 AClampH2(AH2 x,AH2 n,AH2 m){return max(n,min(x,m));}
+ AH3 AClampH3(AH3 x,AH3 n,AH3 m){return max(n,min(x,m));}
+ AH4 AClampH4(AH4 x,AH4 n,AH4 m){return max(n,min(x,m));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // V_FRACT_F16 (note DX frac() is different).
+ AH1 AFractH1(AH1 x){return x-floor(x);}
+ AH2 AFractH2(AH2 x){return x-floor(x);}
+ AH3 AFractH3(AH3 x){return x-floor(x);}
+ AH4 AFractH4(AH4 x){return x-floor(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 ALerpH1(AH1 x,AH1 y,AH1 a){return lerp(x,y,a);}
+ AH2 ALerpH2(AH2 x,AH2 y,AH2 a){return lerp(x,y,a);}
+ AH3 ALerpH3(AH3 x,AH3 y,AH3 a){return lerp(x,y,a);}
+ AH4 ALerpH4(AH4 x,AH4 y,AH4 a){return lerp(x,y,a);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AMax3H1(AH1 x,AH1 y,AH1 z){return max(x,max(y,z));}
+ AH2 AMax3H2(AH2 x,AH2 y,AH2 z){return max(x,max(y,z));}
+ AH3 AMax3H3(AH3 x,AH3 y,AH3 z){return max(x,max(y,z));}
+ AH4 AMax3H4(AH4 x,AH4 y,AH4 z){return max(x,max(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AW1 AMaxSW1(AW1 a,AW1 b){return AW1(max(ASU1(a),ASU1(b)));}
+ AW2 AMaxSW2(AW2 a,AW2 b){return AW2(max(ASU2(a),ASU2(b)));}
+ AW3 AMaxSW3(AW3 a,AW3 b){return AW3(max(ASU3(a),ASU3(b)));}
+ AW4 AMaxSW4(AW4 a,AW4 b){return AW4(max(ASU4(a),ASU4(b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AMin3H1(AH1 x,AH1 y,AH1 z){return min(x,min(y,z));}
+ AH2 AMin3H2(AH2 x,AH2 y,AH2 z){return min(x,min(y,z));}
+ AH3 AMin3H3(AH3 x,AH3 y,AH3 z){return min(x,min(y,z));}
+ AH4 AMin3H4(AH4 x,AH4 y,AH4 z){return min(x,min(y,z));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AW1 AMinSW1(AW1 a,AW1 b){return AW1(min(ASU1(a),ASU1(b)));}
+ AW2 AMinSW2(AW2 a,AW2 b){return AW2(min(ASU2(a),ASU2(b)));}
+ AW3 AMinSW3(AW3 a,AW3 b){return AW3(min(ASU3(a),ASU3(b)));}
+ AW4 AMinSW4(AW4 a,AW4 b){return AW4(min(ASU4(a),ASU4(b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 ARcpH1(AH1 x){return rcp(x);}
+ AH2 ARcpH2(AH2 x){return rcp(x);}
+ AH3 ARcpH3(AH3 x){return rcp(x);}
+ AH4 ARcpH4(AH4 x){return rcp(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 ARsqH1(AH1 x){return rsqrt(x);}
+ AH2 ARsqH2(AH2 x){return rsqrt(x);}
+ AH3 ARsqH3(AH3 x){return rsqrt(x);}
+ AH4 ARsqH4(AH4 x){return rsqrt(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 ASatH1(AH1 x){return saturate(x);}
+ AH2 ASatH2(AH2 x){return saturate(x);}
+ AH3 ASatH3(AH3 x){return saturate(x);}
+ AH4 ASatH4(AH4 x){return saturate(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AW1 AShrSW1(AW1 a,AW1 b){return AW1(ASW1(a)>>ASW1(b));}
+ AW2 AShrSW2(AW2 a,AW2 b){return AW2(ASW2(a)>>ASW2(b));}
+ AW3 AShrSW3(AW3 a,AW3 b){return AW3(ASW3(a)>>ASW3(b));}
+ AW4 AShrSW4(AW4 a,AW4 b){return AW4(ASW4(a)>>ASW4(b));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// HLSL DOUBLE
+//==============================================================================================================================
+ #ifdef A_DUBL
+ #ifdef A_HLSL_6_2
+ #define AD1 float64_t
+ #define AD2 float64_t2
+ #define AD3 float64_t3
+ #define AD4 float64_t4
+ #else
+ #define AD1 double
+ #define AD2 double2
+ #define AD3 double3
+ #define AD4 double4
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ AD1 AD1_x(AD1 a){return AD1(a);}
+ AD2 AD2_x(AD1 a){return AD2(a,a);}
+ AD3 AD3_x(AD1 a){return AD3(a,a,a);}
+ AD4 AD4_x(AD1 a){return AD4(a,a,a,a);}
+ #define AD1_(a) AD1_x(AD1(a))
+ #define AD2_(a) AD2_x(AD1(a))
+ #define AD3_(a) AD3_x(AD1(a))
+ #define AD4_(a) AD4_x(AD1(a))
+//==============================================================================================================================
+ AD1 AFractD1(AD1 a){return a-floor(a);}
+ AD2 AFractD2(AD2 a){return a-floor(a);}
+ AD3 AFractD3(AD3 a){return a-floor(a);}
+ AD4 AFractD4(AD4 a){return a-floor(a);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AD1 ALerpD1(AD1 x,AD1 y,AD1 a){return lerp(x,y,a);}
+ AD2 ALerpD2(AD2 x,AD2 y,AD2 a){return lerp(x,y,a);}
+ AD3 ALerpD3(AD3 x,AD3 y,AD3 a){return lerp(x,y,a);}
+ AD4 ALerpD4(AD4 x,AD4 y,AD4 a){return lerp(x,y,a);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AD1 ARcpD1(AD1 x){return rcp(x);}
+ AD2 ARcpD2(AD2 x){return rcp(x);}
+ AD3 ARcpD3(AD3 x){return rcp(x);}
+ AD4 ARcpD4(AD4 x){return rcp(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AD1 ARsqD1(AD1 x){return rsqrt(x);}
+ AD2 ARsqD2(AD2 x){return rsqrt(x);}
+ AD3 ARsqD3(AD3 x){return rsqrt(x);}
+ AD4 ARsqD4(AD4 x){return rsqrt(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AD1 ASatD1(AD1 x){return saturate(x);}
+ AD2 ASatD2(AD2 x){return saturate(x);}
+ AD3 ASatD3(AD3 x){return saturate(x);}
+ AD4 ASatD4(AD4 x){return saturate(x);}
+ #endif
+//==============================================================================================================================
+// HLSL WAVE
+//==============================================================================================================================
+ #ifdef A_WAVE
+ // Where 'x' must be a compile time literal.
+ AF1 AWaveXorF1(AF1 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+ AF2 AWaveXorF2(AF2 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+ AF3 AWaveXorF3(AF3 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+ AF4 AWaveXorF4(AF4 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+ AU1 AWaveXorU1(AU1 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+ AU2 AWaveXorU1(AU2 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+ AU3 AWaveXorU1(AU3 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+ AU4 AWaveXorU1(AU4 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_HALF
+ AH2 AWaveXorH2(AH2 v,AU1 x){return AH2_AU1(WaveReadLaneAt(AU1_AH2(v),WaveGetLaneIndex()^x));}
+ AH4 AWaveXorH4(AH4 v,AU1 x){return AH4_AU2(WaveReadLaneAt(AU2_AH4(v),WaveGetLaneIndex()^x));}
+ AW2 AWaveXorW2(AW2 v,AU1 x){return AW2_AU1(WaveReadLaneAt(AU1_AW2(v),WaveGetLaneIndex()^x));}
+ AW4 AWaveXorW4(AW4 v,AU1 x){return AW4_AU1(WaveReadLaneAt(AU1_AW4(v),WaveGetLaneIndex()^x));}
+ #endif
+ #endif
+//==============================================================================================================================
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//
+// GPU COMMON
+//
+//
+//==============================================================================================================================
+#ifdef A_GPU
+ // Negative and positive infinity.
+ #define A_INFP_F AF1_AU1(0x7f800000u)
+ #define A_INFN_F AF1_AU1(0xff800000u)
+//------------------------------------------------------------------------------------------------------------------------------
+ // Copy sign from 's' to positive 'd'.
+ AF1 ACpySgnF1(AF1 d,AF1 s){return AF1_AU1(AU1_AF1(d)|(AU1_AF1(s)&AU1_(0x80000000u)));}
+ AF2 ACpySgnF2(AF2 d,AF2 s){return AF2_AU2(AU2_AF2(d)|(AU2_AF2(s)&AU2_(0x80000000u)));}
+ AF3 ACpySgnF3(AF3 d,AF3 s){return AF3_AU3(AU3_AF3(d)|(AU3_AF3(s)&AU3_(0x80000000u)));}
+ AF4 ACpySgnF4(AF4 d,AF4 s){return AF4_AU4(AU4_AF4(d)|(AU4_AF4(s)&AU4_(0x80000000u)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Single operation to return (useful to create a mask to use in lerp for branch free logic),
+ // m=NaN := 0
+ // m>=0 := 0
+ // m<0 := 1
+ // Uses the following useful floating point logic,
+ // saturate(+a*(-INF)==-INF) := 0
+ // saturate( 0*(-INF)== NaN) := 0
+ // saturate(-a*(-INF)==+INF) := 1
+ AF1 ASignedF1(AF1 m){return ASatF1(m*AF1_(A_INFN_F));}
+ AF2 ASignedF2(AF2 m){return ASatF2(m*AF2_(A_INFN_F));}
+ AF3 ASignedF3(AF3 m){return ASatF3(m*AF3_(A_INFN_F));}
+ AF4 ASignedF4(AF4 m){return ASatF4(m*AF4_(A_INFN_F));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AGtZeroF1(AF1 m){return ASatF1(m*AF1_(A_INFP_F));}
+ AF2 AGtZeroF2(AF2 m){return ASatF2(m*AF2_(A_INFP_F));}
+ AF3 AGtZeroF3(AF3 m){return ASatF3(m*AF3_(A_INFP_F));}
+ AF4 AGtZeroF4(AF4 m){return ASatF4(m*AF4_(A_INFP_F));}
+//==============================================================================================================================
+ #ifdef A_HALF
+ #ifdef A_HLSL_6_2
+ #define A_INFP_H AH1_AW1((uint16_t)0x7c00u)
+ #define A_INFN_H AH1_AW1((uint16_t)0xfc00u)
+ #else
+ #define A_INFP_H AH1_AW1(0x7c00u)
+ #define A_INFN_H AH1_AW1(0xfc00u)
+ #endif
+
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 ACpySgnH1(AH1 d,AH1 s){return AH1_AW1(AW1_AH1(d)|(AW1_AH1(s)&AW1_(0x8000u)));}
+ AH2 ACpySgnH2(AH2 d,AH2 s){return AH2_AW2(AW2_AH2(d)|(AW2_AH2(s)&AW2_(0x8000u)));}
+ AH3 ACpySgnH3(AH3 d,AH3 s){return AH3_AW3(AW3_AH3(d)|(AW3_AH3(s)&AW3_(0x8000u)));}
+ AH4 ACpySgnH4(AH4 d,AH4 s){return AH4_AW4(AW4_AH4(d)|(AW4_AH4(s)&AW4_(0x8000u)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 ASignedH1(AH1 m){return ASatH1(m*AH1_(A_INFN_H));}
+ AH2 ASignedH2(AH2 m){return ASatH2(m*AH2_(A_INFN_H));}
+ AH3 ASignedH3(AH3 m){return ASatH3(m*AH3_(A_INFN_H));}
+ AH4 ASignedH4(AH4 m){return ASatH4(m*AH4_(A_INFN_H));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AGtZeroH1(AH1 m){return ASatH1(m*AH1_(A_INFP_H));}
+ AH2 AGtZeroH2(AH2 m){return ASatH2(m*AH2_(A_INFP_H));}
+ AH3 AGtZeroH3(AH3 m){return ASatH3(m*AH3_(A_INFP_H));}
+ AH4 AGtZeroH4(AH4 m){return ASatH4(m*AH4_(A_INFP_H));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// [FIS] FLOAT INTEGER SORTABLE
+//------------------------------------------------------------------------------------------------------------------------------
+// Float to integer sortable.
+// - If sign bit=0, flip the sign bit (positives).
+// - If sign bit=1, flip all bits (negatives).
+// Integer sortable to float.
+// - If sign bit=1, flip the sign bit (positives).
+// - If sign bit=0, flip all bits (negatives).
+// Has nice side effects.
+// - Larger integers are more positive values.
+// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage).
+// Burns 3 ops for conversion {shift,or,xor}.
+//==============================================================================================================================
+ AU1 AFisToU1(AU1 x){return x^(( AShrSU1(x,AU1_(31)))|AU1_(0x80000000));}
+ AU1 AFisFromU1(AU1 x){return x^((~AShrSU1(x,AU1_(31)))|AU1_(0x80000000));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Just adjust high 16-bit value (useful when upper part of 32-bit word is a 16-bit float value).
+ AU1 AFisToHiU1(AU1 x){return x^(( AShrSU1(x,AU1_(15)))|AU1_(0x80000000));}
+ AU1 AFisFromHiU1(AU1 x){return x^((~AShrSU1(x,AU1_(15)))|AU1_(0x80000000));}
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_HALF
+ AW1 AFisToW1(AW1 x){return x^(( AShrSW1(x,AW1_(15)))|AW1_(0x8000));}
+ AW1 AFisFromW1(AW1 x){return x^((~AShrSW1(x,AW1_(15)))|AW1_(0x8000));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AW2 AFisToW2(AW2 x){return x^(( AShrSW2(x,AW2_(15)))|AW2_(0x8000));}
+ AW2 AFisFromW2(AW2 x){return x^((~AShrSW2(x,AW2_(15)))|AW2_(0x8000));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// [PERM] V_PERM_B32
+//------------------------------------------------------------------------------------------------------------------------------
+// Support for V_PERM_B32 started in the 3rd generation of GCN.
+//------------------------------------------------------------------------------------------------------------------------------
+// yyyyxxxx - The 'i' input.
+// 76543210
+// ========
+// HGFEDCBA - Naming on permutation.
+//------------------------------------------------------------------------------------------------------------------------------
+// TODO
+// ====
+// - Make sure compiler optimizes this.
+//==============================================================================================================================
+ #ifdef A_HALF
+ AU1 APerm0E0A(AU2 i){return((i.x )&0xffu)|((i.y<<16)&0xff0000u);}
+ AU1 APerm0F0B(AU2 i){return((i.x>> 8)&0xffu)|((i.y<< 8)&0xff0000u);}
+ AU1 APerm0G0C(AU2 i){return((i.x>>16)&0xffu)|((i.y )&0xff0000u);}
+ AU1 APerm0H0D(AU2 i){return((i.x>>24)&0xffu)|((i.y>> 8)&0xff0000u);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 APermHGFA(AU2 i){return((i.x )&0x000000ffu)|(i.y&0xffffff00u);}
+ AU1 APermHGFC(AU2 i){return((i.x>>16)&0x000000ffu)|(i.y&0xffffff00u);}
+ AU1 APermHGAE(AU2 i){return((i.x<< 8)&0x0000ff00u)|(i.y&0xffff00ffu);}
+ AU1 APermHGCE(AU2 i){return((i.x>> 8)&0x0000ff00u)|(i.y&0xffff00ffu);}
+ AU1 APermHAFE(AU2 i){return((i.x<<16)&0x00ff0000u)|(i.y&0xff00ffffu);}
+ AU1 APermHCFE(AU2 i){return((i.x )&0x00ff0000u)|(i.y&0xff00ffffu);}
+ AU1 APermAGFE(AU2 i){return((i.x<<24)&0xff000000u)|(i.y&0x00ffffffu);}
+ AU1 APermCGFE(AU2 i){return((i.x<< 8)&0xff000000u)|(i.y&0x00ffffffu);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 APermGCEA(AU2 i){return((i.x)&0x00ff00ffu)|((i.y<<8)&0xff00ff00u);}
+ AU1 APermGECA(AU2 i){return(((i.x)&0xffu)|((i.x>>8)&0xff00u)|((i.y<<16)&0xff0000u)|((i.y<<8)&0xff000000u));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// [BUC] BYTE UNSIGNED CONVERSION
+//------------------------------------------------------------------------------------------------------------------------------
+// Designed to use the optimal conversion, enables the scaling to possibly be factored into other computation.
+// Works on a range of {0 to A_BUC_<32,16>}, for <32-bit, and 16-bit> respectively.
+//------------------------------------------------------------------------------------------------------------------------------
+// OPCODE NOTES
+// ============
+// GCN does not do UNORM or SNORM for bytes in opcodes.
+// - V_CVT_F32_UBYTE{0,1,2,3} - Unsigned byte to float.
+// - V_CVT_PKACC_U8_F32 - Float to unsigned byte (does bit-field insert into 32-bit integer).
+// V_PERM_B32 does byte packing with ability to zero fill bytes as well.
+// - Can pull out byte values from two sources, and zero fill upper 8-bits of packed hi and lo.
+//------------------------------------------------------------------------------------------------------------------------------
+// BYTE : FLOAT - ABuc{0,1,2,3}{To,From}U1() - Designed for V_CVT_F32_UBYTE* and V_CVT_PKACCUM_U8_F32 ops.
+// ==== =====
+// 0 : 0
+// 1 : 1
+// ...
+// 255 : 255
+// : 256 (just outside the encoding range)
+//------------------------------------------------------------------------------------------------------------------------------
+// BYTE : FLOAT - ABuc{0,1,2,3}{To,From}U2() - Designed for 16-bit denormal tricks and V_PERM_B32.
+// ==== =====
+// 0 : 0
+// 1 : 1/512
+// 2 : 1/256
+// ...
+// 64 : 1/8
+// 128 : 1/4
+// 255 : 255/512
+// : 1/2 (just outside the encoding range)
+//------------------------------------------------------------------------------------------------------------------------------
+// OPTIMAL IMPLEMENTATIONS ON AMD ARCHITECTURES
+// ============================================
+// r=ABuc0FromU1(i)
+// V_CVT_F32_UBYTE0 r,i
+// --------------------------------------------
+// r=ABuc0ToU1(d,i)
+// V_CVT_PKACCUM_U8_F32 r,i,0,d
+// --------------------------------------------
+// d=ABuc0FromU2(i)
+// Where 'k0' is an SGPR with 0x0E0A
+// Where 'k1' is an SGPR with {32768.0} packed into the lower 16-bits
+// V_PERM_B32 d,i.x,i.y,k0
+// V_PK_FMA_F16 d,d,k1.x,0
+// --------------------------------------------
+// r=ABuc0ToU2(d,i)
+// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits
+// Where 'k1' is an SGPR with 0x????
+// Where 'k2' is an SGPR with 0x????
+// V_PK_FMA_F16 i,i,k0.x,0
+// V_PERM_B32 r.x,i,i,k1
+// V_PERM_B32 r.y,i,i,k2
+//==============================================================================================================================
+ // Peak range for 32-bit and 16-bit operations.
+ #define A_BUC_32 (255.0)
+ #define A_BUC_16 (255.0/512.0)
+//==============================================================================================================================
+ #if 1
+ // Designed to be one V_CVT_PKACCUM_U8_F32.
+ // The extra min is required to pattern match to V_CVT_PKACCUM_U8_F32.
+ AU1 ABuc0ToU1(AU1 d,AF1 i){return (d&0xffffff00u)|((min(AU1(i),255u) )&(0x000000ffu));}
+ AU1 ABuc1ToU1(AU1 d,AF1 i){return (d&0xffff00ffu)|((min(AU1(i),255u)<< 8)&(0x0000ff00u));}
+ AU1 ABuc2ToU1(AU1 d,AF1 i){return (d&0xff00ffffu)|((min(AU1(i),255u)<<16)&(0x00ff0000u));}
+ AU1 ABuc3ToU1(AU1 d,AF1 i){return (d&0x00ffffffu)|((min(AU1(i),255u)<<24)&(0xff000000u));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Designed to be one V_CVT_F32_UBYTE*.
+ AF1 ABuc0FromU1(AU1 i){return AF1((i )&255u);}
+ AF1 ABuc1FromU1(AU1 i){return AF1((i>> 8)&255u);}
+ AF1 ABuc2FromU1(AU1 i){return AF1((i>>16)&255u);}
+ AF1 ABuc3FromU1(AU1 i){return AF1((i>>24)&255u);}
+ #endif
+//==============================================================================================================================
+ #ifdef A_HALF
+ // Takes {x0,x1} and {y0,y1} and builds {{x0,y0},{x1,y1}}.
+ AW2 ABuc01ToW2(AH2 x,AH2 y){x*=AH2_(1.0/32768.0);y*=AH2_(1.0/32768.0);
+ return AW2_AU1(APermGCEA(AU2(AU1_AW2(AW2_AH2(x)),AU1_AW2(AW2_AH2(y)))));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Designed for 3 ops to do SOA to AOS and conversion.
+ AU2 ABuc0ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)));
+ return AU2(APermHGFA(AU2(d.x,b)),APermHGFC(AU2(d.y,b)));}
+ AU2 ABuc1ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)));
+ return AU2(APermHGAE(AU2(d.x,b)),APermHGCE(AU2(d.y,b)));}
+ AU2 ABuc2ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)));
+ return AU2(APermHAFE(AU2(d.x,b)),APermHCFE(AU2(d.y,b)));}
+ AU2 ABuc3ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)));
+ return AU2(APermAGFE(AU2(d.x,b)),APermCGFE(AU2(d.y,b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Designed for 2 ops to do both AOS to SOA, and conversion.
+ AH2 ABuc0FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0E0A(i)))*AH2_(32768.0);}
+ AH2 ABuc1FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0F0B(i)))*AH2_(32768.0);}
+ AH2 ABuc2FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0G0C(i)))*AH2_(32768.0);}
+ AH2 ABuc3FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0H0D(i)))*AH2_(32768.0);}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// [BSC] BYTE SIGNED CONVERSION
+//------------------------------------------------------------------------------------------------------------------------------
+// Similar to [BUC].
+// Works on a range of {-/+ A_BSC_<32,16>}, for <32-bit, and 16-bit> respectively.
+//------------------------------------------------------------------------------------------------------------------------------
+// ENCODING (without zero-based encoding)
+// ========
+// 0 = unused (can be used to mean something else)
+// 1 = lowest value
+// 128 = exact zero center (zero based encoding
+// 255 = highest value
+//------------------------------------------------------------------------------------------------------------------------------
+// Zero-based [Zb] flips the MSB bit of the byte (making 128 "exact zero" actually zero).
+// This is useful if there is a desire for cleared values to decode as zero.
+//------------------------------------------------------------------------------------------------------------------------------
+// BYTE : FLOAT - ABsc{0,1,2,3}{To,From}U2() - Designed for 16-bit denormal tricks and V_PERM_B32.
+// ==== =====
+// 0 : -127/512 (unused)
+// 1 : -126/512
+// 2 : -125/512
+// ...
+// 128 : 0
+// ...
+// 255 : 127/512
+// : 1/4 (just outside the encoding range)
+//==============================================================================================================================
+ // Peak range for 32-bit and 16-bit operations.
+ #define A_BSC_32 (127.0)
+ #define A_BSC_16 (127.0/512.0)
+//==============================================================================================================================
+ #if 1
+ AU1 ABsc0ToU1(AU1 d,AF1 i){return (d&0xffffff00u)|((min(AU1(i+128.0),255u) )&(0x000000ffu));}
+ AU1 ABsc1ToU1(AU1 d,AF1 i){return (d&0xffff00ffu)|((min(AU1(i+128.0),255u)<< 8)&(0x0000ff00u));}
+ AU1 ABsc2ToU1(AU1 d,AF1 i){return (d&0xff00ffffu)|((min(AU1(i+128.0),255u)<<16)&(0x00ff0000u));}
+ AU1 ABsc3ToU1(AU1 d,AF1 i){return (d&0x00ffffffu)|((min(AU1(i+128.0),255u)<<24)&(0xff000000u));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 ABsc0ToZbU1(AU1 d,AF1 i){return ((d&0xffffff00u)|((min(AU1(trunc(i)+128.0),255u) )&(0x000000ffu)))^0x00000080u;}
+ AU1 ABsc1ToZbU1(AU1 d,AF1 i){return ((d&0xffff00ffu)|((min(AU1(trunc(i)+128.0),255u)<< 8)&(0x0000ff00u)))^0x00008000u;}
+ AU1 ABsc2ToZbU1(AU1 d,AF1 i){return ((d&0xff00ffffu)|((min(AU1(trunc(i)+128.0),255u)<<16)&(0x00ff0000u)))^0x00800000u;}
+ AU1 ABsc3ToZbU1(AU1 d,AF1 i){return ((d&0x00ffffffu)|((min(AU1(trunc(i)+128.0),255u)<<24)&(0xff000000u)))^0x80000000u;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ABsc0FromU1(AU1 i){return AF1((i )&255u)-128.0;}
+ AF1 ABsc1FromU1(AU1 i){return AF1((i>> 8)&255u)-128.0;}
+ AF1 ABsc2FromU1(AU1 i){return AF1((i>>16)&255u)-128.0;}
+ AF1 ABsc3FromU1(AU1 i){return AF1((i>>24)&255u)-128.0;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ABsc0FromZbU1(AU1 i){return AF1(((i )&255u)^0x80u)-128.0;}
+ AF1 ABsc1FromZbU1(AU1 i){return AF1(((i>> 8)&255u)^0x80u)-128.0;}
+ AF1 ABsc2FromZbU1(AU1 i){return AF1(((i>>16)&255u)^0x80u)-128.0;}
+ AF1 ABsc3FromZbU1(AU1 i){return AF1(((i>>24)&255u)^0x80u)-128.0;}
+ #endif
+//==============================================================================================================================
+ #ifdef A_HALF
+ // Takes {x0,x1} and {y0,y1} and builds {{x0,y0},{x1,y1}}.
+ AW2 ABsc01ToW2(AH2 x,AH2 y){x=x*AH2_(1.0/32768.0)+AH2_(0.25/32768.0);y=y*AH2_(1.0/32768.0)+AH2_(0.25/32768.0);
+ return AW2_AU1(APermGCEA(AU2(AU1_AW2(AW2_AH2(x)),AU1_AW2(AW2_AH2(y)))));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU2 ABsc0ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)));
+ return AU2(APermHGFA(AU2(d.x,b)),APermHGFC(AU2(d.y,b)));}
+ AU2 ABsc1ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)));
+ return AU2(APermHGAE(AU2(d.x,b)),APermHGCE(AU2(d.y,b)));}
+ AU2 ABsc2ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)));
+ return AU2(APermHAFE(AU2(d.x,b)),APermHCFE(AU2(d.y,b)));}
+ AU2 ABsc3ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)));
+ return AU2(APermAGFE(AU2(d.x,b)),APermCGFE(AU2(d.y,b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU2 ABsc0ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u;
+ return AU2(APermHGFA(AU2(d.x,b)),APermHGFC(AU2(d.y,b)));}
+ AU2 ABsc1ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u;
+ return AU2(APermHGAE(AU2(d.x,b)),APermHGCE(AU2(d.y,b)));}
+ AU2 ABsc2ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u;
+ return AU2(APermHAFE(AU2(d.x,b)),APermHCFE(AU2(d.y,b)));}
+ AU2 ABsc3ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u;
+ return AU2(APermAGFE(AU2(d.x,b)),APermCGFE(AU2(d.y,b)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH2 ABsc0FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0E0A(i)))*AH2_(32768.0)-AH2_(0.25);}
+ AH2 ABsc1FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0F0B(i)))*AH2_(32768.0)-AH2_(0.25);}
+ AH2 ABsc2FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0G0C(i)))*AH2_(32768.0)-AH2_(0.25);}
+ AH2 ABsc3FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0H0D(i)))*AH2_(32768.0)-AH2_(0.25);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH2 ABsc0FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0E0A(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);}
+ AH2 ABsc1FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0F0B(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);}
+ AH2 ABsc2FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0G0C(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);}
+ AH2 ABsc3FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0H0D(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// HALF APPROXIMATIONS
+//------------------------------------------------------------------------------------------------------------------------------
+// These support only positive inputs.
+// Did not see value yet in specialization for range.
+// Using quick testing, ended up mostly getting the same "best" approximation for various ranges.
+// With hardware that can co-execute transcendentals, the value in approximations could be less than expected.
+// However from a latency perspective, if execution of a transcendental is 4 clk, with no packed support, -> 8 clk total.
+// And co-execution would require a compiler interleaving a lot of independent work for packed usage.
+//------------------------------------------------------------------------------------------------------------------------------
+// The one Newton Raphson iteration form of rsq() was skipped (requires 6 ops total).
+// Same with sqrt(), as this could be x*rsq() (7 ops).
+//==============================================================================================================================
+ #ifdef A_HALF
+ // Minimize squared error across full positive range, 2 ops.
+ // The 0x1de2 based approximation maps {0 to 1} input maps to < 1 output.
+ AH1 APrxLoSqrtH1(AH1 a){return AH1_AW1((AW1_AH1(a)>>AW1_(1))+AW1_(0x1de2));}
+ AH2 APrxLoSqrtH2(AH2 a){return AH2_AW2((AW2_AH2(a)>>AW2_(1))+AW2_(0x1de2));}
+ AH3 APrxLoSqrtH3(AH3 a){return AH3_AW3((AW3_AH3(a)>>AW3_(1))+AW3_(0x1de2));}
+ AH4 APrxLoSqrtH4(AH4 a){return AH4_AW4((AW4_AH4(a)>>AW4_(1))+AW4_(0x1de2));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Lower precision estimation, 1 op.
+ // Minimize squared error across {smallest normal to 16384.0}.
+ AH1 APrxLoRcpH1(AH1 a){return AH1_AW1(AW1_(0x7784)-AW1_AH1(a));}
+ AH2 APrxLoRcpH2(AH2 a){return AH2_AW2(AW2_(0x7784)-AW2_AH2(a));}
+ AH3 APrxLoRcpH3(AH3 a){return AH3_AW3(AW3_(0x7784)-AW3_AH3(a));}
+ AH4 APrxLoRcpH4(AH4 a){return AH4_AW4(AW4_(0x7784)-AW4_AH4(a));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Medium precision estimation, one Newton Raphson iteration, 3 ops.
+ AH1 APrxMedRcpH1(AH1 a){AH1 b=AH1_AW1(AW1_(0x778d)-AW1_AH1(a));return b*(-b*a+AH1_(2.0));}
+ AH2 APrxMedRcpH2(AH2 a){AH2 b=AH2_AW2(AW2_(0x778d)-AW2_AH2(a));return b*(-b*a+AH2_(2.0));}
+ AH3 APrxMedRcpH3(AH3 a){AH3 b=AH3_AW3(AW3_(0x778d)-AW3_AH3(a));return b*(-b*a+AH3_(2.0));}
+ AH4 APrxMedRcpH4(AH4 a){AH4 b=AH4_AW4(AW4_(0x778d)-AW4_AH4(a));return b*(-b*a+AH4_(2.0));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Minimize squared error across {smallest normal to 16384.0}, 2 ops.
+ AH1 APrxLoRsqH1(AH1 a){return AH1_AW1(AW1_(0x59a3)-(AW1_AH1(a)>>AW1_(1)));}
+ AH2 APrxLoRsqH2(AH2 a){return AH2_AW2(AW2_(0x59a3)-(AW2_AH2(a)>>AW2_(1)));}
+ AH3 APrxLoRsqH3(AH3 a){return AH3_AW3(AW3_(0x59a3)-(AW3_AH3(a)>>AW3_(1)));}
+ AH4 APrxLoRsqH4(AH4 a){return AH4_AW4(AW4_(0x59a3)-(AW4_AH4(a)>>AW4_(1)));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// FLOAT APPROXIMATIONS
+//------------------------------------------------------------------------------------------------------------------------------
+// Michal Drobot has an excellent presentation on these: "Low Level Optimizations For GCN",
+// - Idea dates back to SGI, then to Quake 3, etc.
+// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+// - sqrt(x)=rsqrt(x)*x
+// - rcp(x)=rsqrt(x)*rsqrt(x) for positive x
+// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+//------------------------------------------------------------------------------------------------------------------------------
+// These below are from perhaps less complete searching for optimal.
+// Used FP16 normal range for testing with +4096 32-bit step size for sampling error.
+// So these match up well with the half approximations.
+//==============================================================================================================================
+ AF1 APrxLoSqrtF1(AF1 a){return AF1_AU1((AU1_AF1(a)>>AU1_(1))+AU1_(0x1fbc4639));}
+ AF1 APrxLoRcpF1(AF1 a){return AF1_AU1(AU1_(0x7ef07ebb)-AU1_AF1(a));}
+ AF1 APrxMedRcpF1(AF1 a){AF1 b=AF1_AU1(AU1_(0x7ef19fff)-AU1_AF1(a));return b*(-b*a+AF1_(2.0));}
+ AF1 APrxLoRsqF1(AF1 a){return AF1_AU1(AU1_(0x5f347d74)-(AU1_AF1(a)>>AU1_(1)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 APrxLoSqrtF2(AF2 a){return AF2_AU2((AU2_AF2(a)>>AU2_(1))+AU2_(0x1fbc4639));}
+ AF2 APrxLoRcpF2(AF2 a){return AF2_AU2(AU2_(0x7ef07ebb)-AU2_AF2(a));}
+ AF2 APrxMedRcpF2(AF2 a){AF2 b=AF2_AU2(AU2_(0x7ef19fff)-AU2_AF2(a));return b*(-b*a+AF2_(2.0));}
+ AF2 APrxLoRsqF2(AF2 a){return AF2_AU2(AU2_(0x5f347d74)-(AU2_AF2(a)>>AU2_(1)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF3 APrxLoSqrtF3(AF3 a){return AF3_AU3((AU3_AF3(a)>>AU3_(1))+AU3_(0x1fbc4639));}
+ AF3 APrxLoRcpF3(AF3 a){return AF3_AU3(AU3_(0x7ef07ebb)-AU3_AF3(a));}
+ AF3 APrxMedRcpF3(AF3 a){AF3 b=AF3_AU3(AU3_(0x7ef19fff)-AU3_AF3(a));return b*(-b*a+AF3_(2.0));}
+ AF3 APrxLoRsqF3(AF3 a){return AF3_AU3(AU3_(0x5f347d74)-(AU3_AF3(a)>>AU3_(1)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF4 APrxLoSqrtF4(AF4 a){return AF4_AU4((AU4_AF4(a)>>AU4_(1))+AU4_(0x1fbc4639));}
+ AF4 APrxLoRcpF4(AF4 a){return AF4_AU4(AU4_(0x7ef07ebb)-AU4_AF4(a));}
+ AF4 APrxMedRcpF4(AF4 a){AF4 b=AF4_AU4(AU4_(0x7ef19fff)-AU4_AF4(a));return b*(-b*a+AF4_(2.0));}
+ AF4 APrxLoRsqF4(AF4 a){return AF4_AU4(AU4_(0x5f347d74)-(AU4_AF4(a)>>AU4_(1)));}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// PQ APPROXIMATIONS
+//------------------------------------------------------------------------------------------------------------------------------
+// PQ is very close to x^(1/8). The functions below Use the fast float approximation method to do
+// PQ<~>Gamma2 (4th power and fast 4th root) and PQ<~>Linear (8th power and fast 8th root). Maximum error is ~0.2%.
+//==============================================================================================================================
+// Helpers
+ AF1 Quart(AF1 a) { a = a * a; return a * a;}
+ AF1 Oct(AF1 a) { a = a * a; a = a * a; return a * a; }
+ AF2 Quart(AF2 a) { a = a * a; return a * a; }
+ AF2 Oct(AF2 a) { a = a * a; a = a * a; return a * a; }
+ AF3 Quart(AF3 a) { a = a * a; return a * a; }
+ AF3 Oct(AF3 a) { a = a * a; a = a * a; return a * a; }
+ AF4 Quart(AF4 a) { a = a * a; return a * a; }
+ AF4 Oct(AF4 a) { a = a * a; a = a * a; return a * a; }
+ //------------------------------------------------------------------------------------------------------------------------------
+ AF1 APrxPQToGamma2(AF1 a) { return Quart(a); }
+ AF1 APrxPQToLinear(AF1 a) { return Oct(a); }
+ AF1 APrxLoGamma2ToPQ(AF1 a) { return AF1_AU1((AU1_AF1(a) >> AU1_(2)) + AU1_(0x2F9A4E46)); }
+ AF1 APrxMedGamma2ToPQ(AF1 a) { AF1 b = AF1_AU1((AU1_AF1(a) >> AU1_(2)) + AU1_(0x2F9A4E46)); AF1 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); }
+ AF1 APrxHighGamma2ToPQ(AF1 a) { return sqrt(sqrt(a)); }
+ AF1 APrxLoLinearToPQ(AF1 a) { return AF1_AU1((AU1_AF1(a) >> AU1_(3)) + AU1_(0x378D8723)); }
+ AF1 APrxMedLinearToPQ(AF1 a) { AF1 b = AF1_AU1((AU1_AF1(a) >> AU1_(3)) + AU1_(0x378D8723)); AF1 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); }
+ AF1 APrxHighLinearToPQ(AF1 a) { return sqrt(sqrt(sqrt(a))); }
+ //------------------------------------------------------------------------------------------------------------------------------
+ AF2 APrxPQToGamma2(AF2 a) { return Quart(a); }
+ AF2 APrxPQToLinear(AF2 a) { return Oct(a); }
+ AF2 APrxLoGamma2ToPQ(AF2 a) { return AF2_AU2((AU2_AF2(a) >> AU2_(2)) + AU2_(0x2F9A4E46)); }
+ AF2 APrxMedGamma2ToPQ(AF2 a) { AF2 b = AF2_AU2((AU2_AF2(a) >> AU2_(2)) + AU2_(0x2F9A4E46)); AF2 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); }
+ AF2 APrxHighGamma2ToPQ(AF2 a) { return sqrt(sqrt(a)); }
+ AF2 APrxLoLinearToPQ(AF2 a) { return AF2_AU2((AU2_AF2(a) >> AU2_(3)) + AU2_(0x378D8723)); }
+ AF2 APrxMedLinearToPQ(AF2 a) { AF2 b = AF2_AU2((AU2_AF2(a) >> AU2_(3)) + AU2_(0x378D8723)); AF2 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); }
+ AF2 APrxHighLinearToPQ(AF2 a) { return sqrt(sqrt(sqrt(a))); }
+ //------------------------------------------------------------------------------------------------------------------------------
+ AF3 APrxPQToGamma2(AF3 a) { return Quart(a); }
+ AF3 APrxPQToLinear(AF3 a) { return Oct(a); }
+ AF3 APrxLoGamma2ToPQ(AF3 a) { return AF3_AU3((AU3_AF3(a) >> AU3_(2)) + AU3_(0x2F9A4E46)); }
+ AF3 APrxMedGamma2ToPQ(AF3 a) { AF3 b = AF3_AU3((AU3_AF3(a) >> AU3_(2)) + AU3_(0x2F9A4E46)); AF3 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); }
+ AF3 APrxHighGamma2ToPQ(AF3 a) { return sqrt(sqrt(a)); }
+ AF3 APrxLoLinearToPQ(AF3 a) { return AF3_AU3((AU3_AF3(a) >> AU3_(3)) + AU3_(0x378D8723)); }
+ AF3 APrxMedLinearToPQ(AF3 a) { AF3 b = AF3_AU3((AU3_AF3(a) >> AU3_(3)) + AU3_(0x378D8723)); AF3 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); }
+ AF3 APrxHighLinearToPQ(AF3 a) { return sqrt(sqrt(sqrt(a))); }
+ //------------------------------------------------------------------------------------------------------------------------------
+ AF4 APrxPQToGamma2(AF4 a) { return Quart(a); }
+ AF4 APrxPQToLinear(AF4 a) { return Oct(a); }
+ AF4 APrxLoGamma2ToPQ(AF4 a) { return AF4_AU4((AU4_AF4(a) >> AU4_(2)) + AU4_(0x2F9A4E46)); }
+ AF4 APrxMedGamma2ToPQ(AF4 a) { AF4 b = AF4_AU4((AU4_AF4(a) >> AU4_(2)) + AU4_(0x2F9A4E46)); AF4 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); }
+ AF4 APrxHighGamma2ToPQ(AF4 a) { return sqrt(sqrt(a)); }
+ AF4 APrxLoLinearToPQ(AF4 a) { return AF4_AU4((AU4_AF4(a) >> AU4_(3)) + AU4_(0x378D8723)); }
+ AF4 APrxMedLinearToPQ(AF4 a) { AF4 b = AF4_AU4((AU4_AF4(a) >> AU4_(3)) + AU4_(0x378D8723)); AF4 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); }
+ AF4 APrxHighLinearToPQ(AF4 a) { return sqrt(sqrt(sqrt(a))); }
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// PARABOLIC SIN & COS
+//------------------------------------------------------------------------------------------------------------------------------
+// Approximate answers to transcendental questions.
+//------------------------------------------------------------------------------------------------------------------------------
+//==============================================================================================================================
+ #if 1
+ // Valid input range is {-1 to 1} representing {0 to 2 pi}.
+ // Output range is {-1/4 to 1/4} representing {-1 to 1}.
+ AF1 APSinF1(AF1 x){return x*abs(x)-x;} // MAD.
+ AF2 APSinF2(AF2 x){return x*abs(x)-x;}
+ AF1 APCosF1(AF1 x){x=AFractF1(x*AF1_(0.5)+AF1_(0.75));x=x*AF1_(2.0)-AF1_(1.0);return APSinF1(x);} // 3x MAD, FRACT
+ AF2 APCosF2(AF2 x){x=AFractF2(x*AF2_(0.5)+AF2_(0.75));x=x*AF2_(2.0)-AF2_(1.0);return APSinF2(x);}
+ AF2 APSinCosF1(AF1 x){AF1 y=AFractF1(x*AF1_(0.5)+AF1_(0.75));y=y*AF1_(2.0)-AF1_(1.0);return APSinF2(AF2(x,y));}
+ #endif
+//------------------------------------------------------------------------------------------------------------------------------
+ #ifdef A_HALF
+ // For a packed {sin,cos} pair,
+ // - Native takes 16 clocks and 4 issue slots (no packed transcendentals).
+ // - Parabolic takes 8 clocks and 8 issue slots (only fract is non-packed).
+ AH1 APSinH1(AH1 x){return x*abs(x)-x;}
+ AH2 APSinH2(AH2 x){return x*abs(x)-x;} // AND,FMA
+ AH1 APCosH1(AH1 x){x=AFractH1(x*AH1_(0.5)+AH1_(0.75));x=x*AH1_(2.0)-AH1_(1.0);return APSinH1(x);}
+ AH2 APCosH2(AH2 x){x=AFractH2(x*AH2_(0.5)+AH2_(0.75));x=x*AH2_(2.0)-AH2_(1.0);return APSinH2(x);} // 3x FMA, 2xFRACT, AND
+ AH2 APSinCosH1(AH1 x){AH1 y=AFractH1(x*AH1_(0.5)+AH1_(0.75));y=y*AH1_(2.0)-AH1_(1.0);return APSinH2(AH2(x,y));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// [ZOL] ZERO ONE LOGIC
+//------------------------------------------------------------------------------------------------------------------------------
+// Conditional free logic designed for easy 16-bit packing, and backwards porting to 32-bit.
+//------------------------------------------------------------------------------------------------------------------------------
+// 0 := false
+// 1 := true
+//------------------------------------------------------------------------------------------------------------------------------
+// AndNot(x,y) -> !(x&y) .... One op.
+// AndOr(x,y,z) -> (x&y)|z ... One op.
+// GtZero(x) -> x>0.0 ..... One op.
+// Sel(x,y,z) -> x?y:z ..... Two ops, has no precision loss.
+// Signed(x) -> x<0.0 ..... One op.
+// ZeroPass(x,y) -> x?0:y ..... Two ops, 'y' is a pass through safe for aliasing as integer.
+//------------------------------------------------------------------------------------------------------------------------------
+// OPTIMIZATION NOTES
+// ==================
+// - On Vega to use 2 constants in a packed op, pass in as one AW2 or one AH2 'k.xy' and use as 'k.xx' and 'k.yy'.
+// For example 'a.xy*k.xx+k.yy'.
+//==============================================================================================================================
+ #if 1
+ AU1 AZolAndU1(AU1 x,AU1 y){return min(x,y);}
+ AU2 AZolAndU2(AU2 x,AU2 y){return min(x,y);}
+ AU3 AZolAndU3(AU3 x,AU3 y){return min(x,y);}
+ AU4 AZolAndU4(AU4 x,AU4 y){return min(x,y);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AZolNotU1(AU1 x){return x^AU1_(1);}
+ AU2 AZolNotU2(AU2 x){return x^AU2_(1);}
+ AU3 AZolNotU3(AU3 x){return x^AU3_(1);}
+ AU4 AZolNotU4(AU4 x){return x^AU4_(1);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AU1 AZolOrU1(AU1 x,AU1 y){return max(x,y);}
+ AU2 AZolOrU2(AU2 x,AU2 y){return max(x,y);}
+ AU3 AZolOrU3(AU3 x,AU3 y){return max(x,y);}
+ AU4 AZolOrU4(AU4 x,AU4 y){return max(x,y);}
+//==============================================================================================================================
+ AU1 AZolF1ToU1(AF1 x){return AU1(x);}
+ AU2 AZolF2ToU2(AF2 x){return AU2(x);}
+ AU3 AZolF3ToU3(AF3 x){return AU3(x);}
+ AU4 AZolF4ToU4(AF4 x){return AU4(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ // 2 ops, denormals don't work in 32-bit on PC (and if they are enabled, OMOD is disabled).
+ AU1 AZolNotF1ToU1(AF1 x){return AU1(AF1_(1.0)-x);}
+ AU2 AZolNotF2ToU2(AF2 x){return AU2(AF2_(1.0)-x);}
+ AU3 AZolNotF3ToU3(AF3 x){return AU3(AF3_(1.0)-x);}
+ AU4 AZolNotF4ToU4(AF4 x){return AU4(AF4_(1.0)-x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AZolU1ToF1(AU1 x){return AF1(x);}
+ AF2 AZolU2ToF2(AU2 x){return AF2(x);}
+ AF3 AZolU3ToF3(AU3 x){return AF3(x);}
+ AF4 AZolU4ToF4(AU4 x){return AF4(x);}
+//==============================================================================================================================
+ AF1 AZolAndF1(AF1 x,AF1 y){return min(x,y);}
+ AF2 AZolAndF2(AF2 x,AF2 y){return min(x,y);}
+ AF3 AZolAndF3(AF3 x,AF3 y){return min(x,y);}
+ AF4 AZolAndF4(AF4 x,AF4 y){return min(x,y);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 ASolAndNotF1(AF1 x,AF1 y){return (-x)*y+AF1_(1.0);}
+ AF2 ASolAndNotF2(AF2 x,AF2 y){return (-x)*y+AF2_(1.0);}
+ AF3 ASolAndNotF3(AF3 x,AF3 y){return (-x)*y+AF3_(1.0);}
+ AF4 ASolAndNotF4(AF4 x,AF4 y){return (-x)*y+AF4_(1.0);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AZolAndOrF1(AF1 x,AF1 y,AF1 z){return ASatF1(x*y+z);}
+ AF2 AZolAndOrF2(AF2 x,AF2 y,AF2 z){return ASatF2(x*y+z);}
+ AF3 AZolAndOrF3(AF3 x,AF3 y,AF3 z){return ASatF3(x*y+z);}
+ AF4 AZolAndOrF4(AF4 x,AF4 y,AF4 z){return ASatF4(x*y+z);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AZolGtZeroF1(AF1 x){return ASatF1(x*AF1_(A_INFP_F));}
+ AF2 AZolGtZeroF2(AF2 x){return ASatF2(x*AF2_(A_INFP_F));}
+ AF3 AZolGtZeroF3(AF3 x){return ASatF3(x*AF3_(A_INFP_F));}
+ AF4 AZolGtZeroF4(AF4 x){return ASatF4(x*AF4_(A_INFP_F));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AZolNotF1(AF1 x){return AF1_(1.0)-x;}
+ AF2 AZolNotF2(AF2 x){return AF2_(1.0)-x;}
+ AF3 AZolNotF3(AF3 x){return AF3_(1.0)-x;}
+ AF4 AZolNotF4(AF4 x){return AF4_(1.0)-x;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AZolOrF1(AF1 x,AF1 y){return max(x,y);}
+ AF2 AZolOrF2(AF2 x,AF2 y){return max(x,y);}
+ AF3 AZolOrF3(AF3 x,AF3 y){return max(x,y);}
+ AF4 AZolOrF4(AF4 x,AF4 y){return max(x,y);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AZolSelF1(AF1 x,AF1 y,AF1 z){AF1 r=(-x)*z+z;return x*y+r;}
+ AF2 AZolSelF2(AF2 x,AF2 y,AF2 z){AF2 r=(-x)*z+z;return x*y+r;}
+ AF3 AZolSelF3(AF3 x,AF3 y,AF3 z){AF3 r=(-x)*z+z;return x*y+r;}
+ AF4 AZolSelF4(AF4 x,AF4 y,AF4 z){AF4 r=(-x)*z+z;return x*y+r;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AZolSignedF1(AF1 x){return ASatF1(x*AF1_(A_INFN_F));}
+ AF2 AZolSignedF2(AF2 x){return ASatF2(x*AF2_(A_INFN_F));}
+ AF3 AZolSignedF3(AF3 x){return ASatF3(x*AF3_(A_INFN_F));}
+ AF4 AZolSignedF4(AF4 x){return ASatF4(x*AF4_(A_INFN_F));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AZolZeroPassF1(AF1 x,AF1 y){return AF1_AU1((AU1_AF1(x)!=AU1_(0))?AU1_(0):AU1_AF1(y));}
+ AF2 AZolZeroPassF2(AF2 x,AF2 y){return AF2_AU2((AU2_AF2(x)!=AU2_(0))?AU2_(0):AU2_AF2(y));}
+ AF3 AZolZeroPassF3(AF3 x,AF3 y){return AF3_AU3((AU3_AF3(x)!=AU3_(0))?AU3_(0):AU3_AF3(y));}
+ AF4 AZolZeroPassF4(AF4 x,AF4 y){return AF4_AU4((AU4_AF4(x)!=AU4_(0))?AU4_(0):AU4_AF4(y));}
+ #endif
+//==============================================================================================================================
+ #ifdef A_HALF
+ AW1 AZolAndW1(AW1 x,AW1 y){return min(x,y);}
+ AW2 AZolAndW2(AW2 x,AW2 y){return min(x,y);}
+ AW3 AZolAndW3(AW3 x,AW3 y){return min(x,y);}
+ AW4 AZolAndW4(AW4 x,AW4 y){return min(x,y);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AW1 AZolNotW1(AW1 x){return x^AW1_(1);}
+ AW2 AZolNotW2(AW2 x){return x^AW2_(1);}
+ AW3 AZolNotW3(AW3 x){return x^AW3_(1);}
+ AW4 AZolNotW4(AW4 x){return x^AW4_(1);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AW1 AZolOrW1(AW1 x,AW1 y){return max(x,y);}
+ AW2 AZolOrW2(AW2 x,AW2 y){return max(x,y);}
+ AW3 AZolOrW3(AW3 x,AW3 y){return max(x,y);}
+ AW4 AZolOrW4(AW4 x,AW4 y){return max(x,y);}
+//==============================================================================================================================
+ // Uses denormal trick.
+ AW1 AZolH1ToW1(AH1 x){return AW1_AH1(x*AH1_AW1(AW1_(1)));}
+ AW2 AZolH2ToW2(AH2 x){return AW2_AH2(x*AH2_AW2(AW2_(1)));}
+ AW3 AZolH3ToW3(AH3 x){return AW3_AH3(x*AH3_AW3(AW3_(1)));}
+ AW4 AZolH4ToW4(AH4 x){return AW4_AH4(x*AH4_AW4(AW4_(1)));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // AMD arch lacks a packed conversion opcode.
+ AH1 AZolW1ToH1(AW1 x){return AH1_AW1(x*AW1_AH1(AH1_(1.0)));}
+ AH2 AZolW2ToH2(AW2 x){return AH2_AW2(x*AW2_AH2(AH2_(1.0)));}
+ AH3 AZolW1ToH3(AW3 x){return AH3_AW3(x*AW3_AH3(AH3_(1.0)));}
+ AH4 AZolW2ToH4(AW4 x){return AH4_AW4(x*AW4_AH4(AH4_(1.0)));}
+//==============================================================================================================================
+ AH1 AZolAndH1(AH1 x,AH1 y){return min(x,y);}
+ AH2 AZolAndH2(AH2 x,AH2 y){return min(x,y);}
+ AH3 AZolAndH3(AH3 x,AH3 y){return min(x,y);}
+ AH4 AZolAndH4(AH4 x,AH4 y){return min(x,y);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 ASolAndNotH1(AH1 x,AH1 y){return (-x)*y+AH1_(1.0);}
+ AH2 ASolAndNotH2(AH2 x,AH2 y){return (-x)*y+AH2_(1.0);}
+ AH3 ASolAndNotH3(AH3 x,AH3 y){return (-x)*y+AH3_(1.0);}
+ AH4 ASolAndNotH4(AH4 x,AH4 y){return (-x)*y+AH4_(1.0);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AZolAndOrH1(AH1 x,AH1 y,AH1 z){return ASatH1(x*y+z);}
+ AH2 AZolAndOrH2(AH2 x,AH2 y,AH2 z){return ASatH2(x*y+z);}
+ AH3 AZolAndOrH3(AH3 x,AH3 y,AH3 z){return ASatH3(x*y+z);}
+ AH4 AZolAndOrH4(AH4 x,AH4 y,AH4 z){return ASatH4(x*y+z);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AZolGtZeroH1(AH1 x){return ASatH1(x*AH1_(A_INFP_H));}
+ AH2 AZolGtZeroH2(AH2 x){return ASatH2(x*AH2_(A_INFP_H));}
+ AH3 AZolGtZeroH3(AH3 x){return ASatH3(x*AH3_(A_INFP_H));}
+ AH4 AZolGtZeroH4(AH4 x){return ASatH4(x*AH4_(A_INFP_H));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AZolNotH1(AH1 x){return AH1_(1.0)-x;}
+ AH2 AZolNotH2(AH2 x){return AH2_(1.0)-x;}
+ AH3 AZolNotH3(AH3 x){return AH3_(1.0)-x;}
+ AH4 AZolNotH4(AH4 x){return AH4_(1.0)-x;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AZolOrH1(AH1 x,AH1 y){return max(x,y);}
+ AH2 AZolOrH2(AH2 x,AH2 y){return max(x,y);}
+ AH3 AZolOrH3(AH3 x,AH3 y){return max(x,y);}
+ AH4 AZolOrH4(AH4 x,AH4 y){return max(x,y);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AZolSelH1(AH1 x,AH1 y,AH1 z){AH1 r=(-x)*z+z;return x*y+r;}
+ AH2 AZolSelH2(AH2 x,AH2 y,AH2 z){AH2 r=(-x)*z+z;return x*y+r;}
+ AH3 AZolSelH3(AH3 x,AH3 y,AH3 z){AH3 r=(-x)*z+z;return x*y+r;}
+ AH4 AZolSelH4(AH4 x,AH4 y,AH4 z){AH4 r=(-x)*z+z;return x*y+r;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AZolSignedH1(AH1 x){return ASatH1(x*AH1_(A_INFN_H));}
+ AH2 AZolSignedH2(AH2 x){return ASatH2(x*AH2_(A_INFN_H));}
+ AH3 AZolSignedH3(AH3 x){return ASatH3(x*AH3_(A_INFN_H));}
+ AH4 AZolSignedH4(AH4 x){return ASatH4(x*AH4_(A_INFN_H));}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// COLOR CONVERSIONS
+//------------------------------------------------------------------------------------------------------------------------------
+// These are all linear to/from some other space (where 'linear' has been shortened out of the function name).
+// So 'ToGamma' is 'LinearToGamma', and 'FromGamma' is 'LinearFromGamma'.
+// These are branch free implementations.
+// The AToSrgbF1() function is useful for stores for compute shaders for GPUs without hardware linear->sRGB store conversion.
+//------------------------------------------------------------------------------------------------------------------------------
+// TRANSFER FUNCTIONS
+// ==================
+// 709 ..... Rec709 used for some HDTVs
+// Gamma ... Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native
+// Pq ...... PQ native for HDR10
+// Srgb .... The sRGB output, typical of PC displays, useful for 10-bit output, or storing to 8-bit UNORM without SRGB type
+// Two ..... Gamma 2.0, fastest conversion (useful for intermediate pass approximations)
+// Three ... Gamma 3.0, less fast, but good for HDR.
+//------------------------------------------------------------------------------------------------------------------------------
+// KEEPING TO SPEC
+// ===============
+// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+// Also there is a slight step in the transition regions.
+// Precision of the coefficients in the spec being the likely cause.
+// Main usage case of the sRGB code is to do the linear->sRGB converstion in a compute shader before store.
+// This is to work around lack of hardware (typically only ROP does the conversion for free).
+// To "correct" the linear segment, would be to introduce error, because hardware decode of sRGB->linear is fixed (and free).
+// So this header keeps with the spec.
+// For linear->sRGB transforms, the linear segment in some respects reduces error, because rounding in that region is linear.
+// Rounding in the curved region in hardware (and fast software code) introduces error due to rounding in non-linear.
+//------------------------------------------------------------------------------------------------------------------------------
+// FOR PQ
+// ======
+// Both input and output is {0.0-1.0}, and where output 1.0 represents 10000.0 cd/m^2.
+// All constants are only specified to FP32 precision.
+// External PQ source reference,
+// - https://github.com/ampas/aces-dev/blob/master/transforms/ctl/utilities/ACESlib.Utilities_Color.a1.0.1.ctl
+//------------------------------------------------------------------------------------------------------------------------------
+// PACKED VERSIONS
+// ===============
+// These are the A*H2() functions.
+// There is no PQ functions as FP16 seemed to not have enough precision for the conversion.
+// The remaining functions are "good enough" for 8-bit, and maybe 10-bit if not concerned about a few 1-bit errors.
+// Precision is lowest in the 709 conversion, higher in sRGB, higher still in Two and Gamma (when using 2.2 at least).
+//------------------------------------------------------------------------------------------------------------------------------
+// NOTES
+// =====
+// Could be faster for PQ conversions to be in ALU or a texture lookup depending on usage case.
+//==============================================================================================================================
+ #if 1
+ AF1 ATo709F1(AF1 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099);
+ return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );}
+ AF2 ATo709F2(AF2 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099);
+ return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );}
+ AF3 ATo709F3(AF3 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099);
+ return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Note 'rcpX' is '1/x', where the 'x' is what would be used in AFromGamma().
+ AF1 AToGammaF1(AF1 c,AF1 rcpX){return pow(c,AF1_(rcpX));}
+ AF2 AToGammaF2(AF2 c,AF1 rcpX){return pow(c,AF2_(rcpX));}
+ AF3 AToGammaF3(AF3 c,AF1 rcpX){return pow(c,AF3_(rcpX));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AToPqF1(AF1 x){AF1 p=pow(x,AF1_(0.159302));
+ return pow((AF1_(0.835938)+AF1_(18.8516)*p)/(AF1_(1.0)+AF1_(18.6875)*p),AF1_(78.8438));}
+ AF2 AToPqF1(AF2 x){AF2 p=pow(x,AF2_(0.159302));
+ return pow((AF2_(0.835938)+AF2_(18.8516)*p)/(AF2_(1.0)+AF2_(18.6875)*p),AF2_(78.8438));}
+ AF3 AToPqF1(AF3 x){AF3 p=pow(x,AF3_(0.159302));
+ return pow((AF3_(0.835938)+AF3_(18.8516)*p)/(AF3_(1.0)+AF3_(18.6875)*p),AF3_(78.8438));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AToSrgbF1(AF1 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055);
+ return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );}
+ AF2 AToSrgbF2(AF2 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055);
+ return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );}
+ AF3 AToSrgbF3(AF3 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055);
+ return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AToTwoF1(AF1 c){return sqrt(c);}
+ AF2 AToTwoF2(AF2 c){return sqrt(c);}
+ AF3 AToTwoF3(AF3 c){return sqrt(c);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AToThreeF1(AF1 c){return pow(c,AF1_(1.0/3.0));}
+ AF2 AToThreeF2(AF2 c){return pow(c,AF2_(1.0/3.0));}
+ AF3 AToThreeF3(AF3 c){return pow(c,AF3_(1.0/3.0));}
+ #endif
+//==============================================================================================================================
+ #if 1
+ // Unfortunately median won't work here.
+ AF1 AFrom709F1(AF1 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099);
+ return AZolSelF1(AZolSignedF1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));}
+ AF2 AFrom709F2(AF2 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099);
+ return AZolSelF2(AZolSignedF2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));}
+ AF3 AFrom709F3(AF3 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099);
+ return AZolSelF3(AZolSignedF3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AFromGammaF1(AF1 c,AF1 x){return pow(c,AF1_(x));}
+ AF2 AFromGammaF2(AF2 c,AF1 x){return pow(c,AF2_(x));}
+ AF3 AFromGammaF3(AF3 c,AF1 x){return pow(c,AF3_(x));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AFromPqF1(AF1 x){AF1 p=pow(x,AF1_(0.0126833));
+ return pow(ASatF1(p-AF1_(0.835938))/(AF1_(18.8516)-AF1_(18.6875)*p),AF1_(6.27739));}
+ AF2 AFromPqF1(AF2 x){AF2 p=pow(x,AF2_(0.0126833));
+ return pow(ASatF2(p-AF2_(0.835938))/(AF2_(18.8516)-AF2_(18.6875)*p),AF2_(6.27739));}
+ AF3 AFromPqF1(AF3 x){AF3 p=pow(x,AF3_(0.0126833));
+ return pow(ASatF3(p-AF3_(0.835938))/(AF3_(18.8516)-AF3_(18.6875)*p),AF3_(6.27739));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Unfortunately median won't work here.
+ AF1 AFromSrgbF1(AF1 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055);
+ return AZolSelF1(AZolSignedF1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));}
+ AF2 AFromSrgbF2(AF2 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055);
+ return AZolSelF2(AZolSignedF2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));}
+ AF3 AFromSrgbF3(AF3 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055);
+ return AZolSelF3(AZolSignedF3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AFromTwoF1(AF1 c){return c*c;}
+ AF2 AFromTwoF2(AF2 c){return c*c;}
+ AF3 AFromTwoF3(AF3 c){return c*c;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF1 AFromThreeF1(AF1 c){return c*c*c;}
+ AF2 AFromThreeF2(AF2 c){return c*c*c;}
+ AF3 AFromThreeF3(AF3 c){return c*c*c;}
+ #endif
+//==============================================================================================================================
+ #ifdef A_HALF
+ AH1 ATo709H1(AH1 c){AH3 j=AH3(0.018*4.5,4.5,0.45);AH2 k=AH2(1.099,-0.099);
+ return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );}
+ AH2 ATo709H2(AH2 c){AH3 j=AH3(0.018*4.5,4.5,0.45);AH2 k=AH2(1.099,-0.099);
+ return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );}
+ AH3 ATo709H3(AH3 c){AH3 j=AH3(0.018*4.5,4.5,0.45);AH2 k=AH2(1.099,-0.099);
+ return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AToGammaH1(AH1 c,AH1 rcpX){return pow(c,AH1_(rcpX));}
+ AH2 AToGammaH2(AH2 c,AH1 rcpX){return pow(c,AH2_(rcpX));}
+ AH3 AToGammaH3(AH3 c,AH1 rcpX){return pow(c,AH3_(rcpX));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AToSrgbH1(AH1 c){AH3 j=AH3(0.0031308*12.92,12.92,1.0/2.4);AH2 k=AH2(1.055,-0.055);
+ return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );}
+ AH2 AToSrgbH2(AH2 c){AH3 j=AH3(0.0031308*12.92,12.92,1.0/2.4);AH2 k=AH2(1.055,-0.055);
+ return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );}
+ AH3 AToSrgbH3(AH3 c){AH3 j=AH3(0.0031308*12.92,12.92,1.0/2.4);AH2 k=AH2(1.055,-0.055);
+ return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AToTwoH1(AH1 c){return sqrt(c);}
+ AH2 AToTwoH2(AH2 c){return sqrt(c);}
+ AH3 AToTwoH3(AH3 c){return sqrt(c);}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AToThreeF1(AH1 c){return pow(c,AH1_(1.0/3.0));}
+ AH2 AToThreeF2(AH2 c){return pow(c,AH2_(1.0/3.0));}
+ AH3 AToThreeF3(AH3 c){return pow(c,AH3_(1.0/3.0));}
+ #endif
+//==============================================================================================================================
+ #ifdef A_HALF
+ AH1 AFrom709H1(AH1 c){AH3 j=AH3(0.081/4.5,1.0/4.5,1.0/0.45);AH2 k=AH2(1.0/1.099,0.099/1.099);
+ return AZolSelH1(AZolSignedH1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));}
+ AH2 AFrom709H2(AH2 c){AH3 j=AH3(0.081/4.5,1.0/4.5,1.0/0.45);AH2 k=AH2(1.0/1.099,0.099/1.099);
+ return AZolSelH2(AZolSignedH2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));}
+ AH3 AFrom709H3(AH3 c){AH3 j=AH3(0.081/4.5,1.0/4.5,1.0/0.45);AH2 k=AH2(1.0/1.099,0.099/1.099);
+ return AZolSelH3(AZolSignedH3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AFromGammaH1(AH1 c,AH1 x){return pow(c,AH1_(x));}
+ AH2 AFromGammaH2(AH2 c,AH1 x){return pow(c,AH2_(x));}
+ AH3 AFromGammaH3(AH3 c,AH1 x){return pow(c,AH3_(x));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AHromSrgbF1(AH1 c){AH3 j=AH3(0.04045/12.92,1.0/12.92,2.4);AH2 k=AH2(1.0/1.055,0.055/1.055);
+ return AZolSelH1(AZolSignedH1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));}
+ AH2 AHromSrgbF2(AH2 c){AH3 j=AH3(0.04045/12.92,1.0/12.92,2.4);AH2 k=AH2(1.0/1.055,0.055/1.055);
+ return AZolSelH2(AZolSignedH2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));}
+ AH3 AHromSrgbF3(AH3 c){AH3 j=AH3(0.04045/12.92,1.0/12.92,2.4);AH2 k=AH2(1.0/1.055,0.055/1.055);
+ return AZolSelH3(AZolSignedH3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AFromTwoH1(AH1 c){return c*c;}
+ AH2 AFromTwoH2(AH2 c){return c*c;}
+ AH3 AFromTwoH3(AH3 c){return c*c;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AH1 AFromThreeH1(AH1 c){return c*c*c;}
+ AH2 AFromThreeH2(AH2 c){return c*c*c;}
+ AH3 AFromThreeH3(AH3 c){return c*c*c;}
+ #endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// CS REMAP
+//==============================================================================================================================
+ // Simple remap 64x1 to 8x8 with rotated 2x2 pixel quads in quad linear.
+ // 543210
+ // ======
+ // ..xxx.
+ // yy...y
+ AU2 ARmp8x8(AU1 a){return AU2(ABfe(a,1u,3u),ABfiM(ABfe(a,3u,3u),a,1u));}
+//==============================================================================================================================
+ // More complex remap 64x1 to 8x8 which is necessary for 2D wave reductions.
+ // 543210
+ // ======
+ // .xx..x
+ // y..yy.
+ // Details,
+ // LANE TO 8x8 MAPPING
+ // ===================
+ // 00 01 08 09 10 11 18 19
+ // 02 03 0a 0b 12 13 1a 1b
+ // 04 05 0c 0d 14 15 1c 1d
+ // 06 07 0e 0f 16 17 1e 1f
+ // 20 21 28 29 30 31 38 39
+ // 22 23 2a 2b 32 33 3a 3b
+ // 24 25 2c 2d 34 35 3c 3d
+ // 26 27 2e 2f 36 37 3e 3f
+ AU2 ARmpRed8x8(AU1 a){return AU2(ABfiM(ABfe(a,2u,3u),a,1u),ABfiM(ABfe(a,3u,3u),ABfe(a,1u,2u),2u));}
+//==============================================================================================================================
+ #ifdef A_HALF
+ AW2 ARmp8x8H(AU1 a){return AW2(ABfe(a,1u,3u),ABfiM(ABfe(a,3u,3u),a,1u));}
+ AW2 ARmpRed8x8H(AU1 a){return AW2(ABfiM(ABfe(a,2u,3u),a,1u),ABfiM(ABfe(a,3u,3u),ABfe(a,1u,2u),2u));}
+ #endif
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+// REFERENCE
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// IEEE FLOAT RULES
+// ================
+// - saturate(NaN)=0, saturate(-INF)=0, saturate(+INF)=1
+// - {+/-}0 * {+/-}INF = NaN
+// - -INF + (+INF) = NaN
+// - {+/-}0 / {+/-}0 = NaN
+// - {+/-}INF / {+/-}INF = NaN
+// - a<(-0) := sqrt(a) = NaN (a=-0.0 won't NaN)
+// - 0 == -0
+// - 4/0 = +INF
+// - 4/-0 = -INF
+// - 4+INF = +INF
+// - 4-INF = -INF
+// - 4*(+INF) = +INF
+// - 4*(-INF) = -INF
+// - -4*(+INF) = -INF
+// - sqrt(+INF) = +INF
+//------------------------------------------------------------------------------------------------------------------------------
+// FP16 ENCODING
+// =============
+// fedcba9876543210
+// ----------------
+// ......mmmmmmmmmm 10-bit mantissa (encodes 11-bit 0.5 to 1.0 except for denormals)
+// .eeeee.......... 5-bit exponent
+// .00000.......... denormals
+// .00001.......... -14 exponent
+// .11110.......... 15 exponent
+// .111110000000000 infinity
+// .11111nnnnnnnnnn NaN with n!=0
+// s............... sign
+//------------------------------------------------------------------------------------------------------------------------------
+// FP16/INT16 ALIASING DENORMAL
+// ============================
+// 11-bit unsigned integers alias with half float denormal/normal values,
+// 1 = 2^(-24) = 1/16777216 ....................... first denormal value
+// 2 = 2^(-23)
+// ...
+// 1023 = 2^(-14)*(1-2^(-10)) = 2^(-14)*(1-1/1024) ... last denormal value
+// 1024 = 2^(-14) = 1/16384 .......................... first normal value that still maps to integers
+// 2047 .............................................. last normal value that still maps to integers
+// Scaling limits,
+// 2^15 = 32768 ...................................... largest power of 2 scaling
+// Largest pow2 conversion mapping is at *32768,
+// 1 : 2^(-9) = 1/512
+// 2 : 1/256
+// 4 : 1/128
+// 8 : 1/64
+// 16 : 1/32
+// 32 : 1/16
+// 64 : 1/8
+// 128 : 1/4
+// 256 : 1/2
+// 512 : 1
+// 1024 : 2
+// 2047 : a little less than 4
+//==============================================================================================================================
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//
+// GPU/CPU PORTABILITY
+//
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// This is the GPU implementation.
+// See the CPU implementation for docs.
+//==============================================================================================================================
+#ifdef A_GPU
+ #define A_TRUE true
+ #define A_FALSE false
+ #define A_STATIC
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// VECTOR ARGUMENT/RETURN/INITIALIZATION PORTABILITY
+//==============================================================================================================================
+ #define retAD2 AD2
+ #define retAD3 AD3
+ #define retAD4 AD4
+ #define retAF2 AF2
+ #define retAF3 AF3
+ #define retAF4 AF4
+ #define retAL2 AL2
+ #define retAL3 AL3
+ #define retAL4 AL4
+ #define retAU2 AU2
+ #define retAU3 AU3
+ #define retAU4 AU4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define inAD2 in AD2
+ #define inAD3 in AD3
+ #define inAD4 in AD4
+ #define inAF2 in AF2
+ #define inAF3 in AF3
+ #define inAF4 in AF4
+ #define inAL2 in AL2
+ #define inAL3 in AL3
+ #define inAL4 in AL4
+ #define inAU2 in AU2
+ #define inAU3 in AU3
+ #define inAU4 in AU4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define inoutAD2 inout AD2
+ #define inoutAD3 inout AD3
+ #define inoutAD4 inout AD4
+ #define inoutAF2 inout AF2
+ #define inoutAF3 inout AF3
+ #define inoutAF4 inout AF4
+ #define inoutAL2 inout AL2
+ #define inoutAL3 inout AL3
+ #define inoutAL4 inout AL4
+ #define inoutAU2 inout AU2
+ #define inoutAU3 inout AU3
+ #define inoutAU4 inout AU4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define outAD2 out AD2
+ #define outAD3 out AD3
+ #define outAD4 out AD4
+ #define outAF2 out AF2
+ #define outAF3 out AF3
+ #define outAF4 out AF4
+ #define outAL2 out AL2
+ #define outAL3 out AL3
+ #define outAL4 out AL4
+ #define outAU2 out AU2
+ #define outAU3 out AU3
+ #define outAU4 out AU4
+//------------------------------------------------------------------------------------------------------------------------------
+ #define varAD2(x) AD2 x
+ #define varAD3(x) AD3 x
+ #define varAD4(x) AD4 x
+ #define varAF2(x) AF2 x
+ #define varAF3(x) AF3 x
+ #define varAF4(x) AF4 x
+ #define varAL2(x) AL2 x
+ #define varAL3(x) AL3 x
+ #define varAL4(x) AL4 x
+ #define varAU2(x) AU2 x
+ #define varAU3(x) AU3 x
+ #define varAU4(x) AU4 x
+//------------------------------------------------------------------------------------------------------------------------------
+ #define initAD2(x,y) AD2(x,y)
+ #define initAD3(x,y,z) AD3(x,y,z)
+ #define initAD4(x,y,z,w) AD4(x,y,z,w)
+ #define initAF2(x,y) AF2(x,y)
+ #define initAF3(x,y,z) AF3(x,y,z)
+ #define initAF4(x,y,z,w) AF4(x,y,z,w)
+ #define initAL2(x,y) AL2(x,y)
+ #define initAL3(x,y,z) AL3(x,y,z)
+ #define initAL4(x,y,z,w) AL4(x,y,z,w)
+ #define initAU2(x,y) AU2(x,y)
+ #define initAU3(x,y,z) AU3(x,y,z)
+ #define initAU4(x,y,z,w) AU4(x,y,z,w)
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// SCALAR RETURN OPS
+//==============================================================================================================================
+ #define AAbsD1(a) abs(AD1(a))
+ #define AAbsF1(a) abs(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ACosD1(a) cos(AD1(a))
+ #define ACosF1(a) cos(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ADotD2(a,b) dot(AD2(a),AD2(b))
+ #define ADotD3(a,b) dot(AD3(a),AD3(b))
+ #define ADotD4(a,b) dot(AD4(a),AD4(b))
+ #define ADotF2(a,b) dot(AF2(a),AF2(b))
+ #define ADotF3(a,b) dot(AF3(a),AF3(b))
+ #define ADotF4(a,b) dot(AF4(a),AF4(b))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AExp2D1(a) exp2(AD1(a))
+ #define AExp2F1(a) exp2(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AFloorD1(a) floor(AD1(a))
+ #define AFloorF1(a) floor(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ALog2D1(a) log2(AD1(a))
+ #define ALog2F1(a) log2(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AMaxD1(a,b) max(a,b)
+ #define AMaxF1(a,b) max(a,b)
+ #define AMaxL1(a,b) max(a,b)
+ #define AMaxU1(a,b) max(a,b)
+//------------------------------------------------------------------------------------------------------------------------------
+ #define AMinD1(a,b) min(a,b)
+ #define AMinF1(a,b) min(a,b)
+ #define AMinL1(a,b) min(a,b)
+ #define AMinU1(a,b) min(a,b)
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ASinD1(a) sin(AD1(a))
+ #define ASinF1(a) sin(AF1(a))
+//------------------------------------------------------------------------------------------------------------------------------
+ #define ASqrtD1(a) sqrt(AD1(a))
+ #define ASqrtF1(a) sqrt(AF1(a))
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// SCALAR RETURN OPS - DEPENDENT
+//==============================================================================================================================
+ #define APowD1(a,b) pow(AD1(a),AF1(b))
+ #define APowF1(a,b) pow(AF1(a),AF1(b))
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// VECTOR OPS
+//------------------------------------------------------------------------------------------------------------------------------
+// These are added as needed for production or prototyping, so not necessarily a complete set.
+// They follow a convention of taking in a destination and also returning the destination value to increase utility.
+//==============================================================================================================================
+ #ifdef A_DUBL
+ AD2 opAAbsD2(outAD2 d,inAD2 a){d=abs(a);return d;}
+ AD3 opAAbsD3(outAD3 d,inAD3 a){d=abs(a);return d;}
+ AD4 opAAbsD4(outAD4 d,inAD4 a){d=abs(a);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AD2 opAAddD2(outAD2 d,inAD2 a,inAD2 b){d=a+b;return d;}
+ AD3 opAAddD3(outAD3 d,inAD3 a,inAD3 b){d=a+b;return d;}
+ AD4 opAAddD4(outAD4 d,inAD4 a,inAD4 b){d=a+b;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AD2 opAAddOneD2(outAD2 d,inAD2 a,AD1 b){d=a+AD2_(b);return d;}
+ AD3 opAAddOneD3(outAD3 d,inAD3 a,AD1 b){d=a+AD3_(b);return d;}
+ AD4 opAAddOneD4(outAD4 d,inAD4 a,AD1 b){d=a+AD4_(b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AD2 opACpyD2(outAD2 d,inAD2 a){d=a;return d;}
+ AD3 opACpyD3(outAD3 d,inAD3 a){d=a;return d;}
+ AD4 opACpyD4(outAD4 d,inAD4 a){d=a;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AD2 opALerpD2(outAD2 d,inAD2 a,inAD2 b,inAD2 c){d=ALerpD2(a,b,c);return d;}
+ AD3 opALerpD3(outAD3 d,inAD3 a,inAD3 b,inAD3 c){d=ALerpD3(a,b,c);return d;}
+ AD4 opALerpD4(outAD4 d,inAD4 a,inAD4 b,inAD4 c){d=ALerpD4(a,b,c);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AD2 opALerpOneD2(outAD2 d,inAD2 a,inAD2 b,AD1 c){d=ALerpD2(a,b,AD2_(c));return d;}
+ AD3 opALerpOneD3(outAD3 d,inAD3 a,inAD3 b,AD1 c){d=ALerpD3(a,b,AD3_(c));return d;}
+ AD4 opALerpOneD4(outAD4 d,inAD4 a,inAD4 b,AD1 c){d=ALerpD4(a,b,AD4_(c));return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AD2 opAMaxD2(outAD2 d,inAD2 a,inAD2 b){d=max(a,b);return d;}
+ AD3 opAMaxD3(outAD3 d,inAD3 a,inAD3 b){d=max(a,b);return d;}
+ AD4 opAMaxD4(outAD4 d,inAD4 a,inAD4 b){d=max(a,b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AD2 opAMinD2(outAD2 d,inAD2 a,inAD2 b){d=min(a,b);return d;}
+ AD3 opAMinD3(outAD3 d,inAD3 a,inAD3 b){d=min(a,b);return d;}
+ AD4 opAMinD4(outAD4 d,inAD4 a,inAD4 b){d=min(a,b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AD2 opAMulD2(outAD2 d,inAD2 a,inAD2 b){d=a*b;return d;}
+ AD3 opAMulD3(outAD3 d,inAD3 a,inAD3 b){d=a*b;return d;}
+ AD4 opAMulD4(outAD4 d,inAD4 a,inAD4 b){d=a*b;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AD2 opAMulOneD2(outAD2 d,inAD2 a,AD1 b){d=a*AD2_(b);return d;}
+ AD3 opAMulOneD3(outAD3 d,inAD3 a,AD1 b){d=a*AD3_(b);return d;}
+ AD4 opAMulOneD4(outAD4 d,inAD4 a,AD1 b){d=a*AD4_(b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AD2 opANegD2(outAD2 d,inAD2 a){d=-a;return d;}
+ AD3 opANegD3(outAD3 d,inAD3 a){d=-a;return d;}
+ AD4 opANegD4(outAD4 d,inAD4 a){d=-a;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AD2 opARcpD2(outAD2 d,inAD2 a){d=ARcpD2(a);return d;}
+ AD3 opARcpD3(outAD3 d,inAD3 a){d=ARcpD3(a);return d;}
+ AD4 opARcpD4(outAD4 d,inAD4 a){d=ARcpD4(a);return d;}
+ #endif
+//==============================================================================================================================
+ AF2 opAAbsF2(outAF2 d,inAF2 a){d=abs(a);return d;}
+ AF3 opAAbsF3(outAF3 d,inAF3 a){d=abs(a);return d;}
+ AF4 opAAbsF4(outAF4 d,inAF4 a){d=abs(a);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opAAddF2(outAF2 d,inAF2 a,inAF2 b){d=a+b;return d;}
+ AF3 opAAddF3(outAF3 d,inAF3 a,inAF3 b){d=a+b;return d;}
+ AF4 opAAddF4(outAF4 d,inAF4 a,inAF4 b){d=a+b;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opAAddOneF2(outAF2 d,inAF2 a,AF1 b){d=a+AF2_(b);return d;}
+ AF3 opAAddOneF3(outAF3 d,inAF3 a,AF1 b){d=a+AF3_(b);return d;}
+ AF4 opAAddOneF4(outAF4 d,inAF4 a,AF1 b){d=a+AF4_(b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opACpyF2(outAF2 d,inAF2 a){d=a;return d;}
+ AF3 opACpyF3(outAF3 d,inAF3 a){d=a;return d;}
+ AF4 opACpyF4(outAF4 d,inAF4 a){d=a;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opALerpF2(outAF2 d,inAF2 a,inAF2 b,inAF2 c){d=ALerpF2(a,b,c);return d;}
+ AF3 opALerpF3(outAF3 d,inAF3 a,inAF3 b,inAF3 c){d=ALerpF3(a,b,c);return d;}
+ AF4 opALerpF4(outAF4 d,inAF4 a,inAF4 b,inAF4 c){d=ALerpF4(a,b,c);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opALerpOneF2(outAF2 d,inAF2 a,inAF2 b,AF1 c){d=ALerpF2(a,b,AF2_(c));return d;}
+ AF3 opALerpOneF3(outAF3 d,inAF3 a,inAF3 b,AF1 c){d=ALerpF3(a,b,AF3_(c));return d;}
+ AF4 opALerpOneF4(outAF4 d,inAF4 a,inAF4 b,AF1 c){d=ALerpF4(a,b,AF4_(c));return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opAMaxF2(outAF2 d,inAF2 a,inAF2 b){d=max(a,b);return d;}
+ AF3 opAMaxF3(outAF3 d,inAF3 a,inAF3 b){d=max(a,b);return d;}
+ AF4 opAMaxF4(outAF4 d,inAF4 a,inAF4 b){d=max(a,b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opAMinF2(outAF2 d,inAF2 a,inAF2 b){d=min(a,b);return d;}
+ AF3 opAMinF3(outAF3 d,inAF3 a,inAF3 b){d=min(a,b);return d;}
+ AF4 opAMinF4(outAF4 d,inAF4 a,inAF4 b){d=min(a,b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opAMulF2(outAF2 d,inAF2 a,inAF2 b){d=a*b;return d;}
+ AF3 opAMulF3(outAF3 d,inAF3 a,inAF3 b){d=a*b;return d;}
+ AF4 opAMulF4(outAF4 d,inAF4 a,inAF4 b){d=a*b;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opAMulOneF2(outAF2 d,inAF2 a,AF1 b){d=a*AF2_(b);return d;}
+ AF3 opAMulOneF3(outAF3 d,inAF3 a,AF1 b){d=a*AF3_(b);return d;}
+ AF4 opAMulOneF4(outAF4 d,inAF4 a,AF1 b){d=a*AF4_(b);return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opANegF2(outAF2 d,inAF2 a){d=-a;return d;}
+ AF3 opANegF3(outAF3 d,inAF3 a){d=-a;return d;}
+ AF4 opANegF4(outAF4 d,inAF4 a){d=-a;return d;}
+//------------------------------------------------------------------------------------------------------------------------------
+ AF2 opARcpF2(outAF2 d,inAF2 a){d=ARcpF2(a);return d;}
+ AF3 opARcpF3(outAF3 d,inAF3 a){d=ARcpF3(a);return d;}
+ AF4 opARcpF4(outAF4 d,inAF4 a){d=ARcpF4(a);return d;}
+#endif
Property changes on: ps/trunk/binaries/data/mods/mod/shaders/glsl/ffx_a.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: ps/trunk/binaries/data/mods/mod/shaders/glsl/ffx_fsr1.h
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/glsl/ffx_fsr1.h (nonexistent)
+++ ps/trunk/binaries/data/mods/mod/shaders/glsl/ffx_fsr1.h (revision 28010)
@@ -0,0 +1,1199 @@
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//
+// AMD FidelityFX SUPER RESOLUTION [FSR 1] ::: SPATIAL SCALING & EXTRAS - v1.20210629
+//
+//
+//------------------------------------------------------------------------------------------------------------------------------
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//------------------------------------------------------------------------------------------------------------------------------
+// FidelityFX Super Resolution Sample
+//
+// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//------------------------------------------------------------------------------------------------------------------------------
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//------------------------------------------------------------------------------------------------------------------------------
+// ABOUT
+// =====
+// FSR is a collection of algorithms relating to generating a higher resolution image.
+// This specific header focuses on single-image non-temporal image scaling, and related tools.
+//
+// The core functions are EASU and RCAS:
+// [EASU] Edge Adaptive Spatial Upsampling ....... 1x to 4x area range spatial scaling, clamped adaptive elliptical filter.
+// [RCAS] Robust Contrast Adaptive Sharpening .... A non-scaling variation on CAS.
+// RCAS needs to be applied after EASU as a separate pass.
+//
+// Optional utility functions are:
+// [LFGA] Linear Film Grain Applicator ........... Tool to apply film grain after scaling.
+// [SRTM] Simple Reversible Tone-Mapper .......... Linear HDR {0 to FP16_MAX} to {0 to 1} and back.
+// [TEPD] Temporal Energy Preserving Dither ...... Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion.
+// See each individual sub-section for inline documentation.
+//------------------------------------------------------------------------------------------------------------------------------
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//------------------------------------------------------------------------------------------------------------------------------
+// FUNCTION PERMUTATIONS
+// =====================
+// *F() ..... Single item computation with 32-bit.
+// *H() ..... Single item computation with 16-bit, with packing (aka two 16-bit ops in parallel) when possible.
+// *Hx2() ... Processing two items in parallel with 16-bit, easier packing.
+// Not all interfaces in this file have a *Hx2() form.
+//==============================================================================================================================
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+// FSR - [EASU] EDGE ADAPTIVE SPATIAL UPSAMPLING
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// EASU provides a high quality spatial-only scaling at relatively low cost.
+// Meaning EASU is appropiate for laptops and other low-end GPUs.
+// Quality from 1x to 4x area scaling is good.
+//------------------------------------------------------------------------------------------------------------------------------
+// The scalar uses a modified fast approximation to the standard lanczos(size=2) kernel.
+// EASU runs in a single pass, so it applies a directionally and anisotropically adaptive radial lanczos.
+// This is also kept as simple as possible to have minimum runtime.
+//------------------------------------------------------------------------------------------------------------------------------
+// The lanzcos filter has negative lobes, so by itself it will introduce ringing.
+// To remove all ringing, the algorithm uses the nearest 2x2 input texels as a neighborhood,
+// and limits output to the minimum and maximum of that neighborhood.
+//------------------------------------------------------------------------------------------------------------------------------
+// Input image requirements:
+//
+// Color needs to be encoded as 3 channel[red, green, blue](e.g.XYZ not supported)
+// Each channel needs to be in the range[0, 1]
+// Any color primaries are supported
+// Display / tonemapping curve needs to be as if presenting to sRGB display or similar(e.g.Gamma 2.0)
+// There should be no banding in the input
+// There should be no high amplitude noise in the input
+// There should be no noise in the input that is not at input pixel granularity
+// For performance purposes, use 32bpp formats
+//------------------------------------------------------------------------------------------------------------------------------
+// Best to apply EASU at the end of the frame after tonemapping
+// but before film grain or composite of the UI.
+//------------------------------------------------------------------------------------------------------------------------------
+// Example of including this header for D3D HLSL :
+//
+// #define A_GPU 1
+// #define A_HLSL 1
+// #define A_HALF 1
+// #include "ffx_a.h"
+// #define FSR_EASU_H 1
+// #define FSR_RCAS_H 1
+// //declare input callbacks
+// #include "ffx_fsr1.h"
+//
+// Example of including this header for Vulkan GLSL :
+//
+// #define A_GPU 1
+// #define A_GLSL 1
+// #define A_HALF 1
+// #include "ffx_a.h"
+// #define FSR_EASU_H 1
+// #define FSR_RCAS_H 1
+// //declare input callbacks
+// #include "ffx_fsr1.h"
+//
+// Example of including this header for Vulkan HLSL :
+//
+// #define A_GPU 1
+// #define A_HLSL 1
+// #define A_HLSL_6_2 1
+// #define A_NO_16_BIT_CAST 1
+// #define A_HALF 1
+// #include "ffx_a.h"
+// #define FSR_EASU_H 1
+// #define FSR_RCAS_H 1
+// //declare input callbacks
+// #include "ffx_fsr1.h"
+//
+// Example of declaring the required input callbacks for GLSL :
+// The callbacks need to gather4 for each color channel using the specified texture coordinate 'p'.
+// EASU uses gather4 to reduce position computation logic and for free Arrays of Structures to Structures of Arrays conversion.
+//
+// AH4 FsrEasuRH(AF2 p){return AH4(textureGather(sampler2D(tex,sam),p,0));}
+// AH4 FsrEasuGH(AF2 p){return AH4(textureGather(sampler2D(tex,sam),p,1));}
+// AH4 FsrEasuBH(AF2 p){return AH4(textureGather(sampler2D(tex,sam),p,2));}
+// ...
+// The FsrEasuCon function needs to be called from the CPU or GPU to set up constants.
+// The difference in viewport and input image size is there to support Dynamic Resolution Scaling.
+// To use FsrEasuCon() on the CPU, define A_CPU before including ffx_a and ffx_fsr1.
+// Including a GPU example here, the 'con0' through 'con3' values would be stored out to a constant buffer.
+// AU4 con0,con1,con2,con3;
+// FsrEasuCon(con0,con1,con2,con3,
+// 1920.0,1080.0, // Viewport size (top left aligned) in the input image which is to be scaled.
+// 3840.0,2160.0, // The size of the input image.
+// 2560.0,1440.0); // The output resolution.
+//==============================================================================================================================
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// CONSTANT SETUP
+//==============================================================================================================================
+// Call to setup required constant values (works on CPU or GPU).
+A_STATIC void FsrEasuCon(
+outAU4 con0,
+outAU4 con1,
+outAU4 con2,
+outAU4 con3,
+// This the rendered image resolution being upscaled
+AF1 inputViewportInPixelsX,
+AF1 inputViewportInPixelsY,
+// This is the resolution of the resource containing the input image (useful for dynamic resolution)
+AF1 inputSizeInPixelsX,
+AF1 inputSizeInPixelsY,
+// This is the display resolution which the input image gets upscaled to
+AF1 outputSizeInPixelsX,
+AF1 outputSizeInPixelsY){
+ // Output integer position to a pixel position in viewport.
+ con0[0]=AU1_AF1(inputViewportInPixelsX*ARcpF1(outputSizeInPixelsX));
+ con0[1]=AU1_AF1(inputViewportInPixelsY*ARcpF1(outputSizeInPixelsY));
+ con0[2]=AU1_AF1(AF1_(0.5)*inputViewportInPixelsX*ARcpF1(outputSizeInPixelsX)-AF1_(0.5));
+ con0[3]=AU1_AF1(AF1_(0.5)*inputViewportInPixelsY*ARcpF1(outputSizeInPixelsY)-AF1_(0.5));
+ // Viewport pixel position to normalized image space.
+ // This is used to get upper-left of 'F' tap.
+ con1[0]=AU1_AF1(ARcpF1(inputSizeInPixelsX));
+ con1[1]=AU1_AF1(ARcpF1(inputSizeInPixelsY));
+ // Centers of gather4, first offset from upper-left of 'F'.
+ // +---+---+
+ // | | |
+ // +--(0)--+
+ // | b | c |
+ // +---F---+---+---+
+ // | e | f | g | h |
+ // +--(1)--+--(2)--+
+ // | i | j | k | l |
+ // +---+---+---+---+
+ // | n | o |
+ // +--(3)--+
+ // | | |
+ // +---+---+
+ con1[2]=AU1_AF1(AF1_( 1.0)*ARcpF1(inputSizeInPixelsX));
+ con1[3]=AU1_AF1(AF1_(-1.0)*ARcpF1(inputSizeInPixelsY));
+ // These are from (0) instead of 'F'.
+ con2[0]=AU1_AF1(AF1_(-1.0)*ARcpF1(inputSizeInPixelsX));
+ con2[1]=AU1_AF1(AF1_( 2.0)*ARcpF1(inputSizeInPixelsY));
+ con2[2]=AU1_AF1(AF1_( 1.0)*ARcpF1(inputSizeInPixelsX));
+ con2[3]=AU1_AF1(AF1_( 2.0)*ARcpF1(inputSizeInPixelsY));
+ con3[0]=AU1_AF1(AF1_( 0.0)*ARcpF1(inputSizeInPixelsX));
+ con3[1]=AU1_AF1(AF1_( 4.0)*ARcpF1(inputSizeInPixelsY));
+ con3[2]=con3[3]=0;}
+
+//If the an offset into the input image resource
+A_STATIC void FsrEasuConOffset(
+ outAU4 con0,
+ outAU4 con1,
+ outAU4 con2,
+ outAU4 con3,
+ // This the rendered image resolution being upscaled
+ AF1 inputViewportInPixelsX,
+ AF1 inputViewportInPixelsY,
+ // This is the resolution of the resource containing the input image (useful for dynamic resolution)
+ AF1 inputSizeInPixelsX,
+ AF1 inputSizeInPixelsY,
+ // This is the display resolution which the input image gets upscaled to
+ AF1 outputSizeInPixelsX,
+ AF1 outputSizeInPixelsY,
+ // This is the input image offset into the resource containing it (useful for dynamic resolution)
+ AF1 inputOffsetInPixelsX,
+ AF1 inputOffsetInPixelsY) {
+ FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY, inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY);
+ con0[2] = AU1_AF1(AF1_(0.5) * inputViewportInPixelsX * ARcpF1(outputSizeInPixelsX) - AF1_(0.5) + inputOffsetInPixelsX);
+ con0[3] = AU1_AF1(AF1_(0.5) * inputViewportInPixelsY * ARcpF1(outputSizeInPixelsY) - AF1_(0.5) + inputOffsetInPixelsY);
+}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// NON-PACKED 32-BIT VERSION
+//==============================================================================================================================
+#if defined(A_GPU)&&defined(FSR_EASU_F)
+ // Input callback prototypes, need to be implemented by calling shader
+ AF4 FsrEasuRF(AF2 p);
+ AF4 FsrEasuGF(AF2 p);
+ AF4 FsrEasuBF(AF2 p);
+//------------------------------------------------------------------------------------------------------------------------------
+ // Filtering for a given tap for the scalar.
+ void FsrEasuTapF(
+ inout AF3 aC, // Accumulated color, with negative lobe.
+ inout AF1 aW, // Accumulated weight.
+ AF2 off, // Pixel offset from resolve position to tap.
+ AF2 dir, // Gradient direction.
+ AF2 len, // Length.
+ AF1 lob, // Negative lobe strength.
+ AF1 clp, // Clipping point.
+ AF3 c){ // Tap color.
+ // Rotate offset by direction.
+ AF2 v;
+ v.x=(off.x*( dir.x))+(off.y*dir.y);
+ v.y=(off.x*(-dir.y))+(off.y*dir.x);
+ // Anisotropy.
+ v*=len;
+ // Compute distance^2.
+ AF1 d2=v.x*v.x+v.y*v.y;
+ // Limit to the window as at corner, 2 taps can easily be outside.
+ d2=min(d2,clp);
+ // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x.
+ // (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2
+ // |_______________________________________| |_______________|
+ // base window
+ // The general form of the 'base' is,
+ // (a*(b*x^2-1)^2-(a-1))
+ // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe.
+ AF1 wB=AF1_(2.0/5.0)*d2+AF1_(-1.0);
+ AF1 wA=lob*d2+AF1_(-1.0);
+ wB*=wB;
+ wA*=wA;
+ wB=AF1_(25.0/16.0)*wB+AF1_(-(25.0/16.0-1.0));
+ AF1 w=wB*wA;
+ // Do weighted average.
+ aC+=c*w;aW+=w;}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Accumulate direction and length.
+ void FsrEasuSetF(
+ inout AF2 dir,
+ inout AF1 len,
+ AF2 pp,
+ AP1 biS,AP1 biT,AP1 biU,AP1 biV,
+ AF1 lA,AF1 lB,AF1 lC,AF1 lD,AF1 lE){
+ // Compute bilinear weight, branches factor out as predicates are compiler time immediates.
+ // s t
+ // u v
+ AF1 w = AF1_(0.0);
+ if(biS)w=(AF1_(1.0)-pp.x)*(AF1_(1.0)-pp.y);
+ if(biT)w= pp.x *(AF1_(1.0)-pp.y);
+ if(biU)w=(AF1_(1.0)-pp.x)* pp.y ;
+ if(biV)w= pp.x * pp.y ;
+ // Direction is the '+' diff.
+ // a
+ // b c d
+ // e
+ // Then takes magnitude from abs average of both sides of 'c'.
+ // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms.
+ AF1 dc=lD-lC;
+ AF1 cb=lC-lB;
+ AF1 lenX=max(abs(dc),abs(cb));
+ lenX=APrxLoRcpF1(lenX);
+ AF1 dirX=lD-lB;
+ dir.x+=dirX*w;
+ lenX=ASatF1(abs(dirX)*lenX);
+ lenX*=lenX;
+ len+=lenX*w;
+ // Repeat for the y axis.
+ AF1 ec=lE-lC;
+ AF1 ca=lC-lA;
+ AF1 lenY=max(abs(ec),abs(ca));
+ lenY=APrxLoRcpF1(lenY);
+ AF1 dirY=lE-lA;
+ dir.y+=dirY*w;
+ lenY=ASatF1(abs(dirY)*lenY);
+ lenY*=lenY;
+ len+=lenY*w;}
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrEasuF(
+ out AF3 pix,
+ AU2 ip, // Integer pixel position in output.
+ AU4 con0, // Constants generated by FsrEasuCon().
+ AU4 con1,
+ AU4 con2,
+ AU4 con3){
+//------------------------------------------------------------------------------------------------------------------------------
+ // Get position of 'f'.
+ AF2 pp=AF2(ip)*AF2_AU2(con0.xy)+AF2_AU2(con0.zw);
+ AF2 fp=floor(pp);
+ pp-=fp;
+//------------------------------------------------------------------------------------------------------------------------------
+ // 12-tap kernel.
+ // b c
+ // e f g h
+ // i j k l
+ // n o
+ // Gather 4 ordering.
+ // a b
+ // r g
+ // For packed FP16, need either {rg} or {ab} so using the following setup for gather in all versions,
+ // a b <- unused (z)
+ // r g
+ // a b a b
+ // r g r g
+ // a b
+ // r g <- unused (z)
+ // Allowing dead-code removal to remove the 'z's.
+ AF2 p0=fp*AF2_AU2(con1.xy)+AF2_AU2(con1.zw);
+ // These are from p0 to avoid pulling two constants on pre-Navi hardware.
+ AF2 p1=p0+AF2_AU2(con2.xy);
+ AF2 p2=p0+AF2_AU2(con2.zw);
+ AF2 p3=p0+AF2_AU2(con3.xy);
+ AF4 bczzR=FsrEasuRF(p0);
+ AF4 bczzG=FsrEasuGF(p0);
+ AF4 bczzB=FsrEasuBF(p0);
+ AF4 ijfeR=FsrEasuRF(p1);
+ AF4 ijfeG=FsrEasuGF(p1);
+ AF4 ijfeB=FsrEasuBF(p1);
+ AF4 klhgR=FsrEasuRF(p2);
+ AF4 klhgG=FsrEasuGF(p2);
+ AF4 klhgB=FsrEasuBF(p2);
+ AF4 zzonR=FsrEasuRF(p3);
+ AF4 zzonG=FsrEasuGF(p3);
+ AF4 zzonB=FsrEasuBF(p3);
+//------------------------------------------------------------------------------------------------------------------------------
+ // Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD).
+ AF4 bczzL=bczzB*AF4_(0.5)+(bczzR*AF4_(0.5)+bczzG);
+ AF4 ijfeL=ijfeB*AF4_(0.5)+(ijfeR*AF4_(0.5)+ijfeG);
+ AF4 klhgL=klhgB*AF4_(0.5)+(klhgR*AF4_(0.5)+klhgG);
+ AF4 zzonL=zzonB*AF4_(0.5)+(zzonR*AF4_(0.5)+zzonG);
+ // Rename.
+ AF1 bL=bczzL.x;
+ AF1 cL=bczzL.y;
+ AF1 iL=ijfeL.x;
+ AF1 jL=ijfeL.y;
+ AF1 fL=ijfeL.z;
+ AF1 eL=ijfeL.w;
+ AF1 kL=klhgL.x;
+ AF1 lL=klhgL.y;
+ AF1 hL=klhgL.z;
+ AF1 gL=klhgL.w;
+ AF1 oL=zzonL.z;
+ AF1 nL=zzonL.w;
+ // Accumulate for bilinear interpolation.
+ AF2 dir=AF2_(0.0);
+ AF1 len=AF1_(0.0);
+ FsrEasuSetF(dir,len,pp,true, false,false,false,bL,eL,fL,gL,jL);
+ FsrEasuSetF(dir,len,pp,false,true ,false,false,cL,fL,gL,hL,kL);
+ FsrEasuSetF(dir,len,pp,false,false,true ,false,fL,iL,jL,kL,nL);
+ FsrEasuSetF(dir,len,pp,false,false,false,true ,gL,jL,kL,lL,oL);
+//------------------------------------------------------------------------------------------------------------------------------
+ // Normalize with approximation, and cleanup close to zero.
+ AF2 dir2=dir*dir;
+ AF1 dirR=dir2.x+dir2.y;
+ AP1 zro=dirR w = -m/(n+e+w+s)
+// 1 == (w*(n+e+w+s)+m)/(4*w+1) -> w = (1-m)/(n+e+w+s-4*1)
+// Then chooses the 'w' which results in no clipping, limits 'w', and multiplies by the 'sharp' amount.
+// This solution above has issues with MSAA input as the steps along the gradient cause edge detection issues.
+// So RCAS uses 4x the maximum and 4x the minimum (depending on equation)in place of the individual taps.
+// As well as switching from 'm' to either the minimum or maximum (depending on side), to help in energy conservation.
+// This stabilizes RCAS.
+// RCAS does a simple highpass which is normalized against the local contrast then shaped,
+// 0.25
+// 0.25 -1 0.25
+// 0.25
+// This is used as a noise detection filter, to reduce the effect of RCAS on grain, and focus on real edges.
+//
+// GLSL example for the required callbacks :
+//
+// AH4 FsrRcasLoadH(ASW2 p){return AH4(imageLoad(imgSrc,ASU2(p)));}
+// void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b)
+// {
+// //do any simple input color conversions here or leave empty if none needed
+// }
+//
+// FsrRcasCon need to be called from the CPU or GPU to set up constants.
+// Including a GPU example here, the 'con' value would be stored out to a constant buffer.
+//
+// AU4 con;
+// FsrRcasCon(con,
+// 0.0); // The scale is {0.0 := maximum sharpness, to N>0, where N is the number of stops (halving) of the reduction of sharpness}.
+// ---------------
+// RCAS sharpening supports a CAS-like pass-through alpha via,
+// #define FSR_RCAS_PASSTHROUGH_ALPHA 1
+// RCAS also supports a define to enable a more expensive path to avoid some sharpening of noise.
+// Would suggest it is better to apply film grain after RCAS sharpening (and after scaling) instead of using this define,
+// #define FSR_RCAS_DENOISE 1
+//==============================================================================================================================
+// This is set at the limit of providing unnatural results for sharpening.
+#define FSR_RCAS_LIMIT (0.25-(1.0/16.0))
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// CONSTANT SETUP
+//==============================================================================================================================
+// Call to setup required constant values (works on CPU or GPU).
+A_STATIC void FsrRcasCon(
+outAU4 con,
+// The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}.
+AF1 sharpness){
+ // Transform from stops to linear value.
+ sharpness=AExp2F1(-sharpness);
+ varAF2(hSharp)=initAF2(sharpness,sharpness);
+ con[0]=AU1_AF1(sharpness);
+ con[1]=AU1_AH2_AF2(hSharp);
+ con[2]=0;
+ con[3]=0;}
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// NON-PACKED 32-BIT VERSION
+//==============================================================================================================================
+#if defined(A_GPU)&&defined(FSR_RCAS_F)
+ // Input callback prototypes that need to be implemented by calling shader
+ AF4 FsrRcasLoadF(ASU2 p);
+ void FsrRcasInputF(inout AF1 r,inout AF1 g,inout AF1 b);
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrRcasF(
+ out AF1 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy.
+ out AF1 pixG,
+ out AF1 pixB,
+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+ out AF1 pixA,
+ #endif
+ AU2 ip, // Integer pixel position in output.
+ AU4 con){ // Constant generated by RcasSetup().
+ // Algorithm uses minimal 3x3 pixel neighborhood.
+ // b
+ // d e f
+ // h
+ ASU2 sp=ASU2(ip);
+ AF3 b=FsrRcasLoadF(sp+ASU2( 0,-1)).rgb;
+ AF3 d=FsrRcasLoadF(sp+ASU2(-1, 0)).rgb;
+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+ AF4 ee=FsrRcasLoadF(sp);
+ AF3 e=ee.rgb;pixA=ee.a;
+ #else
+ AF3 e=FsrRcasLoadF(sp).rgb;
+ #endif
+ AF3 f=FsrRcasLoadF(sp+ASU2( 1, 0)).rgb;
+ AF3 h=FsrRcasLoadF(sp+ASU2( 0, 1)).rgb;
+ // Rename (32-bit) or regroup (16-bit).
+ AF1 bR=b.r;
+ AF1 bG=b.g;
+ AF1 bB=b.b;
+ AF1 dR=d.r;
+ AF1 dG=d.g;
+ AF1 dB=d.b;
+ AF1 eR=e.r;
+ AF1 eG=e.g;
+ AF1 eB=e.b;
+ AF1 fR=f.r;
+ AF1 fG=f.g;
+ AF1 fB=f.b;
+ AF1 hR=h.r;
+ AF1 hG=h.g;
+ AF1 hB=h.b;
+ // Run optional input transform.
+ FsrRcasInputF(bR,bG,bB);
+ FsrRcasInputF(dR,dG,dB);
+ FsrRcasInputF(eR,eG,eB);
+ FsrRcasInputF(fR,fG,fB);
+ FsrRcasInputF(hR,hG,hB);
+ // Luma times 2.
+ AF1 bL=bB*AF1_(0.5)+(bR*AF1_(0.5)+bG);
+ AF1 dL=dB*AF1_(0.5)+(dR*AF1_(0.5)+dG);
+ AF1 eL=eB*AF1_(0.5)+(eR*AF1_(0.5)+eG);
+ AF1 fL=fB*AF1_(0.5)+(fR*AF1_(0.5)+fG);
+ AF1 hL=hB*AF1_(0.5)+(hR*AF1_(0.5)+hG);
+ // Noise detection.
+ AF1 nz=AF1_(0.25)*bL+AF1_(0.25)*dL+AF1_(0.25)*fL+AF1_(0.25)*hL-eL;
+ nz=ASatF1(abs(nz)*APrxMedRcpF1(AMax3F1(AMax3F1(bL,dL,eL),fL,hL)-AMin3F1(AMin3F1(bL,dL,eL),fL,hL)));
+ nz=AF1_(-0.5)*nz+AF1_(1.0);
+ // Min and max of ring.
+ AF1 mn4R=min(AMin3F1(bR,dR,fR),hR);
+ AF1 mn4G=min(AMin3F1(bG,dG,fG),hG);
+ AF1 mn4B=min(AMin3F1(bB,dB,fB),hB);
+ AF1 mx4R=max(AMax3F1(bR,dR,fR),hR);
+ AF1 mx4G=max(AMax3F1(bG,dG,fG),hG);
+ AF1 mx4B=max(AMax3F1(bB,dB,fB),hB);
+ // Immediate constants for peak range.
+ AF2 peakC=AF2(1.0,-1.0*4.0);
+ // Limiters, these need to be high precision RCPs.
+ AF1 hitMinR=min(mn4R,eR)*ARcpF1(AF1_(4.0)*mx4R);
+ AF1 hitMinG=min(mn4G,eG)*ARcpF1(AF1_(4.0)*mx4G);
+ AF1 hitMinB=min(mn4B,eB)*ARcpF1(AF1_(4.0)*mx4B);
+ AF1 hitMaxR=(peakC.x-max(mx4R,eR))*ARcpF1(AF1_(4.0)*mn4R+peakC.y);
+ AF1 hitMaxG=(peakC.x-max(mx4G,eG))*ARcpF1(AF1_(4.0)*mn4G+peakC.y);
+ AF1 hitMaxB=(peakC.x-max(mx4B,eB))*ARcpF1(AF1_(4.0)*mn4B+peakC.y);
+ AF1 lobeR=max(-hitMinR,hitMaxR);
+ AF1 lobeG=max(-hitMinG,hitMaxG);
+ AF1 lobeB=max(-hitMinB,hitMaxB);
+ AF1 lobe=max(AF1_(-FSR_RCAS_LIMIT),min(AMax3F1(lobeR,lobeG,lobeB),AF1_(0.0)))*AF1_AU1(con.x);
+ // Apply noise removal.
+ #ifdef FSR_RCAS_DENOISE
+ lobe*=nz;
+ #endif
+ // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
+ AF1 rcpL=APrxMedRcpF1(AF1_(4.0)*lobe+AF1_(1.0));
+ pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
+ pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
+ pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;
+ return;}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// NON-PACKED 16-BIT VERSION
+//==============================================================================================================================
+#if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_RCAS_H)
+ // Input callback prototypes that need to be implemented by calling shader
+ AH4 FsrRcasLoadH(ASW2 p);
+ void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b);
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrRcasH(
+ out AH1 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy.
+ out AH1 pixG,
+ out AH1 pixB,
+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+ out AH1 pixA,
+ #endif
+ AU2 ip, // Integer pixel position in output.
+ AU4 con){ // Constant generated by RcasSetup().
+ // Sharpening algorithm uses minimal 3x3 pixel neighborhood.
+ // b
+ // d e f
+ // h
+ ASW2 sp=ASW2(ip);
+ AH3 b=FsrRcasLoadH(sp+ASW2( 0,-1)).rgb;
+ AH3 d=FsrRcasLoadH(sp+ASW2(-1, 0)).rgb;
+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+ AH4 ee=FsrRcasLoadH(sp);
+ AH3 e=ee.rgb;pixA=ee.a;
+ #else
+ AH3 e=FsrRcasLoadH(sp).rgb;
+ #endif
+ AH3 f=FsrRcasLoadH(sp+ASW2( 1, 0)).rgb;
+ AH3 h=FsrRcasLoadH(sp+ASW2( 0, 1)).rgb;
+ // Rename (32-bit) or regroup (16-bit).
+ AH1 bR=b.r;
+ AH1 bG=b.g;
+ AH1 bB=b.b;
+ AH1 dR=d.r;
+ AH1 dG=d.g;
+ AH1 dB=d.b;
+ AH1 eR=e.r;
+ AH1 eG=e.g;
+ AH1 eB=e.b;
+ AH1 fR=f.r;
+ AH1 fG=f.g;
+ AH1 fB=f.b;
+ AH1 hR=h.r;
+ AH1 hG=h.g;
+ AH1 hB=h.b;
+ // Run optional input transform.
+ FsrRcasInputH(bR,bG,bB);
+ FsrRcasInputH(dR,dG,dB);
+ FsrRcasInputH(eR,eG,eB);
+ FsrRcasInputH(fR,fG,fB);
+ FsrRcasInputH(hR,hG,hB);
+ // Luma times 2.
+ AH1 bL=bB*AH1_(0.5)+(bR*AH1_(0.5)+bG);
+ AH1 dL=dB*AH1_(0.5)+(dR*AH1_(0.5)+dG);
+ AH1 eL=eB*AH1_(0.5)+(eR*AH1_(0.5)+eG);
+ AH1 fL=fB*AH1_(0.5)+(fR*AH1_(0.5)+fG);
+ AH1 hL=hB*AH1_(0.5)+(hR*AH1_(0.5)+hG);
+ // Noise detection.
+ AH1 nz=AH1_(0.25)*bL+AH1_(0.25)*dL+AH1_(0.25)*fL+AH1_(0.25)*hL-eL;
+ nz=ASatH1(abs(nz)*APrxMedRcpH1(AMax3H1(AMax3H1(bL,dL,eL),fL,hL)-AMin3H1(AMin3H1(bL,dL,eL),fL,hL)));
+ nz=AH1_(-0.5)*nz+AH1_(1.0);
+ // Min and max of ring.
+ AH1 mn4R=min(AMin3H1(bR,dR,fR),hR);
+ AH1 mn4G=min(AMin3H1(bG,dG,fG),hG);
+ AH1 mn4B=min(AMin3H1(bB,dB,fB),hB);
+ AH1 mx4R=max(AMax3H1(bR,dR,fR),hR);
+ AH1 mx4G=max(AMax3H1(bG,dG,fG),hG);
+ AH1 mx4B=max(AMax3H1(bB,dB,fB),hB);
+ // Immediate constants for peak range.
+ AH2 peakC=AH2(1.0,-1.0*4.0);
+ // Limiters, these need to be high precision RCPs.
+ AH1 hitMinR=min(mn4R,eR)*ARcpH1(AH1_(4.0)*mx4R);
+ AH1 hitMinG=min(mn4G,eG)*ARcpH1(AH1_(4.0)*mx4G);
+ AH1 hitMinB=min(mn4B,eB)*ARcpH1(AH1_(4.0)*mx4B);
+ AH1 hitMaxR=(peakC.x-max(mx4R,eR))*ARcpH1(AH1_(4.0)*mn4R+peakC.y);
+ AH1 hitMaxG=(peakC.x-max(mx4G,eG))*ARcpH1(AH1_(4.0)*mn4G+peakC.y);
+ AH1 hitMaxB=(peakC.x-max(mx4B,eB))*ARcpH1(AH1_(4.0)*mn4B+peakC.y);
+ AH1 lobeR=max(-hitMinR,hitMaxR);
+ AH1 lobeG=max(-hitMinG,hitMaxG);
+ AH1 lobeB=max(-hitMinB,hitMaxB);
+ AH1 lobe=max(AH1_(-FSR_RCAS_LIMIT),min(AMax3H1(lobeR,lobeG,lobeB),AH1_(0.0)))*AH2_AU1(con.y).x;
+ // Apply noise removal.
+ #ifdef FSR_RCAS_DENOISE
+ lobe*=nz;
+ #endif
+ // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
+ AH1 rcpL=APrxMedRcpH1(AH1_(4.0)*lobe+AH1_(1.0));
+ pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
+ pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
+ pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// PACKED 16-BIT VERSION
+//==============================================================================================================================
+#if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_RCAS_HX2)
+ // Input callback prototypes that need to be implemented by the calling shader
+ AH4 FsrRcasLoadHx2(ASW2 p);
+ void FsrRcasInputHx2(inout AH2 r,inout AH2 g,inout AH2 b);
+//------------------------------------------------------------------------------------------------------------------------------
+ // Can be used to convert from packed Structures of Arrays to Arrays of Structures for store.
+ void FsrRcasDepackHx2(out AH4 pix0,out AH4 pix1,AH2 pixR,AH2 pixG,AH2 pixB){
+ #ifdef A_HLSL
+ // Invoke a slower path for DX only, since it won't allow uninitialized values.
+ pix0.a=pix1.a=0.0;
+ #endif
+ pix0.rgb=AH3(pixR.x,pixG.x,pixB.x);
+ pix1.rgb=AH3(pixR.y,pixG.y,pixB.y);}
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrRcasHx2(
+ // Output values are for 2 8x8 tiles in a 16x8 region.
+ // pix.x = left 8x8 tile
+ // pix.y = right 8x8 tile
+ // This enables later processing to easily be packed as well.
+ out AH2 pixR,
+ out AH2 pixG,
+ out AH2 pixB,
+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+ out AH2 pixA,
+ #endif
+ AU2 ip, // Integer pixel position in output.
+ AU4 con){ // Constant generated by RcasSetup().
+ // No scaling algorithm uses minimal 3x3 pixel neighborhood.
+ ASW2 sp0=ASW2(ip);
+ AH3 b0=FsrRcasLoadHx2(sp0+ASW2( 0,-1)).rgb;
+ AH3 d0=FsrRcasLoadHx2(sp0+ASW2(-1, 0)).rgb;
+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+ AH4 ee0=FsrRcasLoadHx2(sp0);
+ AH3 e0=ee0.rgb;pixA.r=ee0.a;
+ #else
+ AH3 e0=FsrRcasLoadHx2(sp0).rgb;
+ #endif
+ AH3 f0=FsrRcasLoadHx2(sp0+ASW2( 1, 0)).rgb;
+ AH3 h0=FsrRcasLoadHx2(sp0+ASW2( 0, 1)).rgb;
+ ASW2 sp1=sp0+ASW2(8,0);
+ AH3 b1=FsrRcasLoadHx2(sp1+ASW2( 0,-1)).rgb;
+ AH3 d1=FsrRcasLoadHx2(sp1+ASW2(-1, 0)).rgb;
+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+ AH4 ee1=FsrRcasLoadHx2(sp1);
+ AH3 e1=ee1.rgb;pixA.g=ee1.a;
+ #else
+ AH3 e1=FsrRcasLoadHx2(sp1).rgb;
+ #endif
+ AH3 f1=FsrRcasLoadHx2(sp1+ASW2( 1, 0)).rgb;
+ AH3 h1=FsrRcasLoadHx2(sp1+ASW2( 0, 1)).rgb;
+ // Arrays of Structures to Structures of Arrays conversion.
+ AH2 bR=AH2(b0.r,b1.r);
+ AH2 bG=AH2(b0.g,b1.g);
+ AH2 bB=AH2(b0.b,b1.b);
+ AH2 dR=AH2(d0.r,d1.r);
+ AH2 dG=AH2(d0.g,d1.g);
+ AH2 dB=AH2(d0.b,d1.b);
+ AH2 eR=AH2(e0.r,e1.r);
+ AH2 eG=AH2(e0.g,e1.g);
+ AH2 eB=AH2(e0.b,e1.b);
+ AH2 fR=AH2(f0.r,f1.r);
+ AH2 fG=AH2(f0.g,f1.g);
+ AH2 fB=AH2(f0.b,f1.b);
+ AH2 hR=AH2(h0.r,h1.r);
+ AH2 hG=AH2(h0.g,h1.g);
+ AH2 hB=AH2(h0.b,h1.b);
+ // Run optional input transform.
+ FsrRcasInputHx2(bR,bG,bB);
+ FsrRcasInputHx2(dR,dG,dB);
+ FsrRcasInputHx2(eR,eG,eB);
+ FsrRcasInputHx2(fR,fG,fB);
+ FsrRcasInputHx2(hR,hG,hB);
+ // Luma times 2.
+ AH2 bL=bB*AH2_(0.5)+(bR*AH2_(0.5)+bG);
+ AH2 dL=dB*AH2_(0.5)+(dR*AH2_(0.5)+dG);
+ AH2 eL=eB*AH2_(0.5)+(eR*AH2_(0.5)+eG);
+ AH2 fL=fB*AH2_(0.5)+(fR*AH2_(0.5)+fG);
+ AH2 hL=hB*AH2_(0.5)+(hR*AH2_(0.5)+hG);
+ // Noise detection.
+ AH2 nz=AH2_(0.25)*bL+AH2_(0.25)*dL+AH2_(0.25)*fL+AH2_(0.25)*hL-eL;
+ nz=ASatH2(abs(nz)*APrxMedRcpH2(AMax3H2(AMax3H2(bL,dL,eL),fL,hL)-AMin3H2(AMin3H2(bL,dL,eL),fL,hL)));
+ nz=AH2_(-0.5)*nz+AH2_(1.0);
+ // Min and max of ring.
+ AH2 mn4R=min(AMin3H2(bR,dR,fR),hR);
+ AH2 mn4G=min(AMin3H2(bG,dG,fG),hG);
+ AH2 mn4B=min(AMin3H2(bB,dB,fB),hB);
+ AH2 mx4R=max(AMax3H2(bR,dR,fR),hR);
+ AH2 mx4G=max(AMax3H2(bG,dG,fG),hG);
+ AH2 mx4B=max(AMax3H2(bB,dB,fB),hB);
+ // Immediate constants for peak range.
+ AH2 peakC=AH2(1.0,-1.0*4.0);
+ // Limiters, these need to be high precision RCPs.
+ AH2 hitMinR=min(mn4R,eR)*ARcpH2(AH2_(4.0)*mx4R);
+ AH2 hitMinG=min(mn4G,eG)*ARcpH2(AH2_(4.0)*mx4G);
+ AH2 hitMinB=min(mn4B,eB)*ARcpH2(AH2_(4.0)*mx4B);
+ AH2 hitMaxR=(peakC.x-max(mx4R,eR))*ARcpH2(AH2_(4.0)*mn4R+peakC.y);
+ AH2 hitMaxG=(peakC.x-max(mx4G,eG))*ARcpH2(AH2_(4.0)*mn4G+peakC.y);
+ AH2 hitMaxB=(peakC.x-max(mx4B,eB))*ARcpH2(AH2_(4.0)*mn4B+peakC.y);
+ AH2 lobeR=max(-hitMinR,hitMaxR);
+ AH2 lobeG=max(-hitMinG,hitMaxG);
+ AH2 lobeB=max(-hitMinB,hitMaxB);
+ AH2 lobe=max(AH2_(-FSR_RCAS_LIMIT),min(AMax3H2(lobeR,lobeG,lobeB),AH2_(0.0)))*AH2_(AH2_AU1(con.y).x);
+ // Apply noise removal.
+ #ifdef FSR_RCAS_DENOISE
+ lobe*=nz;
+ #endif
+ // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
+ AH2 rcpL=APrxMedRcpH2(AH2_(4.0)*lobe+AH2_(1.0));
+ pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
+ pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
+ pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+// FSR - [LFGA] LINEAR FILM GRAIN APPLICATOR
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// Adding output-resolution film grain after scaling is a good way to mask both rendering and scaling artifacts.
+// Suggest using tiled blue noise as film grain input, with peak noise frequency set for a specific look and feel.
+// The 'Lfga*()' functions provide a convenient way to introduce grain.
+// These functions limit grain based on distance to signal limits.
+// This is done so that the grain is temporally energy preserving, and thus won't modify image tonality.
+// Grain application should be done in a linear colorspace.
+// The grain should be temporally changing, but have a temporal sum per pixel that adds to zero (non-biased).
+//------------------------------------------------------------------------------------------------------------------------------
+// Usage,
+// FsrLfga*(
+// color, // In/out linear colorspace color {0 to 1} ranged.
+// grain, // Per pixel grain texture value {-0.5 to 0.5} ranged, input is 3-channel to support colored grain.
+// amount); // Amount of grain (0 to 1} ranged.
+//------------------------------------------------------------------------------------------------------------------------------
+// Example if grain texture is monochrome: 'FsrLfgaF(color,AF3_(grain),amount)'
+//==============================================================================================================================
+#if defined(A_GPU)
+ // Maximum grain is the minimum distance to the signal limit.
+ void FsrLfgaF(inout AF3 c,AF3 t,AF1 a){c+=(t*AF3_(a))*min(AF3_(1.0)-c,c);}
+#endif
+//==============================================================================================================================
+#if defined(A_GPU)&&defined(A_HALF)
+ // Half precision version (slower).
+ void FsrLfgaH(inout AH3 c,AH3 t,AH1 a){c+=(t*AH3_(a))*min(AH3_(1.0)-c,c);}
+//------------------------------------------------------------------------------------------------------------------------------
+ // Packed half precision version (faster).
+ void FsrLfgaHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 tR,AH2 tG,AH2 tB,AH1 a){
+ cR+=(tR*AH2_(a))*min(AH2_(1.0)-cR,cR);cG+=(tG*AH2_(a))*min(AH2_(1.0)-cG,cG);cB+=(tB*AH2_(a))*min(AH2_(1.0)-cB,cB);}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+// FSR - [SRTM] SIMPLE REVERSIBLE TONE-MAPPER
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// This provides a way to take linear HDR color {0 to FP16_MAX} and convert it into a temporary {0 to 1} ranged post-tonemapped linear.
+// The tonemapper preserves RGB ratio, which helps maintain HDR color bleed during filtering.
+//------------------------------------------------------------------------------------------------------------------------------
+// Reversible tonemapper usage,
+// FsrSrtm*(color); // {0 to FP16_MAX} converted to {0 to 1}.
+// FsrSrtmInv*(color); // {0 to 1} converted into {0 to 32768, output peak safe for FP16}.
+//==============================================================================================================================
+#if defined(A_GPU)
+ void FsrSrtmF(inout AF3 c){c*=AF3_(ARcpF1(AMax3F1(c.r,c.g,c.b)+AF1_(1.0)));}
+ // The extra max solves the c=1.0 case (which is a /0).
+ void FsrSrtmInvF(inout AF3 c){c*=AF3_(ARcpF1(max(AF1_(1.0/32768.0),AF1_(1.0)-AMax3F1(c.r,c.g,c.b))));}
+#endif
+//==============================================================================================================================
+#if defined(A_GPU)&&defined(A_HALF)
+ void FsrSrtmH(inout AH3 c){c*=AH3_(ARcpH1(AMax3H1(c.r,c.g,c.b)+AH1_(1.0)));}
+ void FsrSrtmInvH(inout AH3 c){c*=AH3_(ARcpH1(max(AH1_(1.0/32768.0),AH1_(1.0)-AMax3H1(c.r,c.g,c.b))));}
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrSrtmHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB){
+ AH2 rcp=ARcpH2(AMax3H2(cR,cG,cB)+AH2_(1.0));cR*=rcp;cG*=rcp;cB*=rcp;}
+ void FsrSrtmInvHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB){
+ AH2 rcp=ARcpH2(max(AH2_(1.0/32768.0),AH2_(1.0)-AMax3H2(cR,cG,cB)));cR*=rcp;cG*=rcp;cB*=rcp;}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+// FSR - [TEPD] TEMPORAL ENERGY PRESERVING DITHER
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion.
+// Gamma 2.0 is used so that the conversion back to linear is just to square the color.
+// The conversion comes in 8-bit and 10-bit modes, designed for output to 8-bit UNORM or 10:10:10:2 respectively.
+// Given good non-biased temporal blue noise as dither input,
+// the output dither will temporally conserve energy.
+// This is done by choosing the linear nearest step point instead of perceptual nearest.
+// See code below for details.
+//------------------------------------------------------------------------------------------------------------------------------
+// DX SPEC RULES FOR FLOAT->UNORM 8-BIT CONVERSION
+// ===============================================
+// - Output is 'uint(floor(saturate(n)*255.0+0.5))'.
+// - Thus rounding is to nearest.
+// - NaN gets converted to zero.
+// - INF is clamped to {0.0 to 1.0}.
+//==============================================================================================================================
+#if defined(A_GPU)
+ // Hand tuned integer position to dither value, with more values than simple checkerboard.
+ // Only 32-bit has enough precision for this compddation.
+ // Output is {0 to <1}.
+ AF1 FsrTepdDitF(AU2 p,AU1 f){
+ AF1 x=AF1_(p.x+f);
+ AF1 y=AF1_(p.y);
+ // The 1.61803 golden ratio.
+ AF1 a=AF1_((1.0+sqrt(5.0))/2.0);
+ // Number designed to provide a good visual pattern.
+ AF1 b=AF1_(1.0/3.69);
+ x=x*a+(y*b);
+ return AFractF1(x);}
+//------------------------------------------------------------------------------------------------------------------------------
+ // This version is 8-bit gamma 2.0.
+ // The 'c' input is {0 to 1}.
+ // Output is {0 to 1} ready for image store.
+ void FsrTepdC8F(inout AF3 c,AF1 dit){
+ AF3 n=sqrt(c);
+ n=floor(n*AF3_(255.0))*AF3_(1.0/255.0);
+ AF3 a=n*n;
+ AF3 b=n+AF3_(1.0/255.0);b=b*b;
+ // Ratio of 'a' to 'b' required to produce 'c'.
+ // APrxLoRcpF1() won't work here (at least for very high dynamic ranges).
+ // APrxMedRcpF1() is an IADD,FMA,MUL.
+ AF3 r=(c-b)*APrxMedRcpF3(a-b);
+ // Use the ratio as a cutoff to choose 'a' or 'b'.
+ // AGtZeroF1() is a MUL.
+ c=ASatF3(n+AGtZeroF3(AF3_(dit)-r)*AF3_(1.0/255.0));}
+//------------------------------------------------------------------------------------------------------------------------------
+ // This version is 10-bit gamma 2.0.
+ // The 'c' input is {0 to 1}.
+ // Output is {0 to 1} ready for image store.
+ void FsrTepdC10F(inout AF3 c,AF1 dit){
+ AF3 n=sqrt(c);
+ n=floor(n*AF3_(1023.0))*AF3_(1.0/1023.0);
+ AF3 a=n*n;
+ AF3 b=n+AF3_(1.0/1023.0);b=b*b;
+ AF3 r=(c-b)*APrxMedRcpF3(a-b);
+ c=ASatF3(n+AGtZeroF3(AF3_(dit)-r)*AF3_(1.0/1023.0));}
+#endif
+//==============================================================================================================================
+#if defined(A_GPU)&&defined(A_HALF)
+ AH1 FsrTepdDitH(AU2 p,AU1 f){
+ AF1 x=AF1_(p.x+f);
+ AF1 y=AF1_(p.y);
+ AF1 a=AF1_((1.0+sqrt(5.0))/2.0);
+ AF1 b=AF1_(1.0/3.69);
+ x=x*a+(y*b);
+ return AH1(AFractF1(x));}
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC8H(inout AH3 c,AH1 dit){
+ AH3 n=sqrt(c);
+ n=floor(n*AH3_(255.0))*AH3_(1.0/255.0);
+ AH3 a=n*n;
+ AH3 b=n+AH3_(1.0/255.0);b=b*b;
+ AH3 r=(c-b)*APrxMedRcpH3(a-b);
+ c=ASatH3(n+AGtZeroH3(AH3_(dit)-r)*AH3_(1.0/255.0));}
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC10H(inout AH3 c,AH1 dit){
+ AH3 n=sqrt(c);
+ n=floor(n*AH3_(1023.0))*AH3_(1.0/1023.0);
+ AH3 a=n*n;
+ AH3 b=n+AH3_(1.0/1023.0);b=b*b;
+ AH3 r=(c-b)*APrxMedRcpH3(a-b);
+ c=ASatH3(n+AGtZeroH3(AH3_(dit)-r)*AH3_(1.0/1023.0));}
+//==============================================================================================================================
+ // This computes dither for positions 'p' and 'p+{8,0}'.
+ AH2 FsrTepdDitHx2(AU2 p,AU1 f){
+ AF2 x;
+ x.x=AF1_(p.x+f);
+ x.y=x.x+AF1_(8.0);
+ AF1 y=AF1_(p.y);
+ AF1 a=AF1_((1.0+sqrt(5.0))/2.0);
+ AF1 b=AF1_(1.0/3.69);
+ x=x*AF2_(a)+AF2_(y*b);
+ return AH2(AFractF2(x));}
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC8Hx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 dit){
+ AH2 nR=sqrt(cR);
+ AH2 nG=sqrt(cG);
+ AH2 nB=sqrt(cB);
+ nR=floor(nR*AH2_(255.0))*AH2_(1.0/255.0);
+ nG=floor(nG*AH2_(255.0))*AH2_(1.0/255.0);
+ nB=floor(nB*AH2_(255.0))*AH2_(1.0/255.0);
+ AH2 aR=nR*nR;
+ AH2 aG=nG*nG;
+ AH2 aB=nB*nB;
+ AH2 bR=nR+AH2_(1.0/255.0);bR=bR*bR;
+ AH2 bG=nG+AH2_(1.0/255.0);bG=bG*bG;
+ AH2 bB=nB+AH2_(1.0/255.0);bB=bB*bB;
+ AH2 rR=(cR-bR)*APrxMedRcpH2(aR-bR);
+ AH2 rG=(cG-bG)*APrxMedRcpH2(aG-bG);
+ AH2 rB=(cB-bB)*APrxMedRcpH2(aB-bB);
+ cR=ASatH2(nR+AGtZeroH2(dit-rR)*AH2_(1.0/255.0));
+ cG=ASatH2(nG+AGtZeroH2(dit-rG)*AH2_(1.0/255.0));
+ cB=ASatH2(nB+AGtZeroH2(dit-rB)*AH2_(1.0/255.0));}
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC10Hx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 dit){
+ AH2 nR=sqrt(cR);
+ AH2 nG=sqrt(cG);
+ AH2 nB=sqrt(cB);
+ nR=floor(nR*AH2_(1023.0))*AH2_(1.0/1023.0);
+ nG=floor(nG*AH2_(1023.0))*AH2_(1.0/1023.0);
+ nB=floor(nB*AH2_(1023.0))*AH2_(1.0/1023.0);
+ AH2 aR=nR*nR;
+ AH2 aG=nG*nG;
+ AH2 aB=nB*nB;
+ AH2 bR=nR+AH2_(1.0/1023.0);bR=bR*bR;
+ AH2 bG=nG+AH2_(1.0/1023.0);bG=bG*bG;
+ AH2 bB=nB+AH2_(1.0/1023.0);bB=bB*bB;
+ AH2 rR=(cR-bR)*APrxMedRcpH2(aR-bR);
+ AH2 rG=(cG-bG)*APrxMedRcpH2(aG-bG);
+ AH2 rB=(cB-bB)*APrxMedRcpH2(aB-bB);
+ cR=ASatH2(nR+AGtZeroH2(dit-rR)*AH2_(1.0/1023.0));
+ cG=ASatH2(nG+AGtZeroH2(dit-rG)*AH2_(1.0/1023.0));
+ cB=ASatH2(nB+AGtZeroH2(dit-rB)*AH2_(1.0/1023.0));}
+#endif
Property changes on: ps/trunk/binaries/data/mods/mod/shaders/glsl/ffx_fsr1.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: ps/trunk/binaries/data/mods/mod/shaders/glsl/upscale_bilinear.fs
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/glsl/upscale_bilinear.fs (nonexistent)
+++ ps/trunk/binaries/data/mods/mod/shaders/glsl/upscale_bilinear.fs (revision 28010)
@@ -0,0 +1,19 @@
+#version 120
+
+#include "common/fragment.h"
+#include "common/stage.h"
+
+BEGIN_DRAW_TEXTURES
+ TEXTURE_2D(0, inTex)
+END_DRAW_TEXTURES
+
+BEGIN_DRAW_UNIFORMS
+ UNIFORM(vec4, screenSize)
+END_DRAW_UNIFORMS
+
+VERTEX_OUTPUT(0, vec2, v_tex);
+
+void main()
+{
+ OUTPUT_FRAGMENT_SINGLE_COLOR(SAMPLE_2D(GET_DRAW_TEXTURE_2D(inTex), v_tex));
+}
Property changes on: ps/trunk/binaries/data/mods/mod/shaders/glsl/upscale_bilinear.fs
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: ps/trunk/binaries/data/mods/mod/shaders/glsl/upscale_bilinear.xml
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/glsl/upscale_bilinear.xml (nonexistent)
+++ ps/trunk/binaries/data/mods/mod/shaders/glsl/upscale_bilinear.xml (revision 28010)
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
Property changes on: ps/trunk/binaries/data/mods/mod/shaders/glsl/upscale_bilinear.xml
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: ps/trunk/binaries/data/mods/mod/shaders/glsl/upscale_nearest.fs
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/glsl/upscale_nearest.fs (nonexistent)
+++ ps/trunk/binaries/data/mods/mod/shaders/glsl/upscale_nearest.fs (revision 28010)
@@ -0,0 +1,19 @@
+#version 130
+
+#include "common/fragment.h"
+#include "common/stage.h"
+
+BEGIN_DRAW_TEXTURES
+ TEXTURE_2D(0, inTex)
+END_DRAW_TEXTURES
+
+BEGIN_DRAW_UNIFORMS
+ UNIFORM(vec4, screenSize)
+END_DRAW_UNIFORMS
+
+VERTEX_OUTPUT(0, vec2, v_tex);
+
+void main()
+{
+ OUTPUT_FRAGMENT_SINGLE_COLOR(texelFetch(GET_DRAW_TEXTURE_2D(inTex), ivec2(v_tex * screenSize.xy), 0));
+}
Property changes on: ps/trunk/binaries/data/mods/mod/shaders/glsl/upscale_nearest.fs
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: ps/trunk/binaries/data/mods/mod/shaders/glsl/upscale_nearest.xml
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/glsl/upscale_nearest.xml (nonexistent)
+++ ps/trunk/binaries/data/mods/mod/shaders/glsl/upscale_nearest.xml (revision 28010)
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
Property changes on: ps/trunk/binaries/data/mods/mod/shaders/glsl/upscale_nearest.xml
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: ps/trunk/binaries/data/mods/mod/shaders/program.rng
===================================================================
--- ps/trunk/binaries/data/mods/mod/shaders/program.rng (revision 28009)
+++ ps/trunk/binaries/data/mods/mod/shaders/program.rng (revision 28010)
@@ -1,115 +1,124 @@
arb
glsl
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
float
vec2
vec3
vec4
mat2
mat3
mat4
sampler2D
sampler2DShadow
samplerCube
pos
normal
color
uv0
uv1
uv2
uv3
uv4
uv5
uv6
uv7
Index: ps/trunk/binaries/data/mods/public/gui/options/options.js
===================================================================
--- ps/trunk/binaries/data/mods/public/gui/options/options.js (revision 28009)
+++ ps/trunk/binaries/data/mods/public/gui/options/options.js (revision 28010)
@@ -1,461 +1,465 @@
/**
* Translated JSON file contents.
*/
var g_Options;
/**
* Names of config keys that have changed, value returned when closing the page.
*/
var g_ChangedKeys;
/**
* Vertical size of a tab button.
*/
var g_TabButtonHeight = 30;
/**
* Vertical space between two tab buttons.
*/
var g_TabButtonDist = 5;
/**
* Vertical distance between the top of the page and the first option.
*/
var g_OptionControlOffset = 5;
/**
* Vertical size of each option control.
*/
var g_OptionControlHeight = 26;
/**
* Vertical distance between two consecutive options.
*/
var g_OptionControlDist = 2;
/**
* Horizontal indentation to distinguish options that depend on another option.
*/
var g_DependentLabelIndentation = 25;
/**
* Color used to indicate that the string entered by the player isn't a sane color.
*/
var g_InsaneColor = "255 0 255";
/**
* Defines the parsing of config strings and GUI control interaction for the different option types.
*
* @property configToValue - parses a string from the user config to a value of the declared type.
* @property valueToGui - sets the GUI control to display the given value.
* @property guiToValue - returns the value of the GUI control.
* @property guiSetter - event name that should be considered a value change of the GUI control.
* @property initGUI - sets properties of the GUI control that are independent of the current value.
* @property sanitizeValue - Displays a visual clue if the entered value is invalid and returns a sane value.
* @property tooltip - appends a custom tooltip to the given option description depending on the current value.
*/
var g_OptionType = {
"boolean":
{
"configToValue": config => config == "true",
"valueToGui": (value, control) => {
control.checked = value;
},
"guiToValue": control => control.checked,
"guiSetter": "onPress"
},
"string":
{
"configToValue": value => value,
"valueToGui": (value, control) => {
control.caption = value;
},
"guiToValue": control => control.caption,
"guiSetter": "onTextEdit"
},
"color":
{
"configToValue": value => value,
"valueToGui": (value, control) => {
control.caption = value;
},
"initGUI": (option, control) => {
control.children[2].onPress = () => {
colorMixer(
control.caption,
(color) => {
if (color != control.caption)
{
control.caption = color;
control.onTextEdit();
}
}
);
};
},
"guiToValue": control => control.caption,
"guiSetter": "onTextEdit",
"sanitizeValue": (value, control, option) => {
let color = guiToRgbColor(value);
let sanitized = rgbToGuiColor(color);
if (control)
{
control.sprite = sanitized == value ? "ModernDarkBoxWhite" : "ModernDarkBoxWhiteInvalid";
control.children[1].sprite = sanitized == value ? "color:" + value : "color:" + g_InsaneColor;
}
return sanitized;
},
"tooltip": (value, option) =>
sprintf(translate("Default: %(value)s"), {
"value": Engine.ConfigDB_GetValue("default", option.config)
})
},
"number":
{
"configToValue": value => value,
"valueToGui": (value, control) => {
control.caption = value;
},
"guiToValue": control => control.caption,
"guiSetter": "onTextEdit",
"sanitizeValue": (value, control, option) => {
let sanitized =
Math.min(option.max !== undefined ? option.max : +Infinity,
Math.max(option.min !== undefined ? option.min : -Infinity,
isNaN(+value) ? 0 : value));
if (control)
control.sprite = sanitized == value ? "ModernDarkBoxWhite" : "ModernDarkBoxWhiteInvalid";
return sanitized;
},
"tooltip": (value, option) =>
sprintf(
option.min !== undefined && option.max !== undefined ?
translateWithContext("option number", "Min: %(min)s, Max: %(max)s") :
option.min !== undefined && option.max === undefined ?
translateWithContext("option number", "Min: %(min)s") :
option.min === undefined && option.max !== undefined ?
translateWithContext("option number", "Max: %(max)s") :
"",
{
"min": option.min,
"max": option.max
})
},
"dropdown":
{
"configToValue": value => value,
"valueToGui": (value, control) => {
control.selected = control.list_data.indexOf(value);
},
"guiToValue": control => control.list_data[control.selected],
"guiSetter": "onSelectionChange",
"initGUI": (option, control) => {
control.list = option.list.map(e => e.label);
control.list_data = option.list.map(e => e.value);
control.onHoverChange = () => {
let item = option.list[control.hovered];
control.tooltip = item && item.tooltip || option.tooltip;
};
}
},
"dropdownNumber":
{
"configToValue": value => +value,
"valueToGui": (value, control) => {
control.selected = control.list_data.indexOf("" + value);
},
"guiToValue": control => +control.list_data[control.selected],
"guiSetter": "onSelectionChange",
"initGUI": (option, control) => {
control.list = option.list.map(e => e.label);
control.list_data = option.list.map(e => e.value);
control.onHoverChange = () => {
const item = option.list[control.hovered];
control.tooltip = item && item.tooltip || option.tooltip;
};
},
"timeout": (option, oldValue, hasChanges, newValue) => {
if (!option.timeout)
return;
timedConfirmation(
500, 200,
translate("Changes will be reverted in %(time)s seconds. Do you want to keep changes?"),
"time",
option.timeout,
translate("Warning"),
[translate("No"), translate("Yes")],
[() => {this.revertChange(option, +oldValue, hasChanges);}, null]
);
}
},
"slider":
{
"configToValue": value => +value,
"valueToGui": (value, control) => {
control.value = +value;
},
"guiToValue": control => control.value,
"guiSetter": "onValueChange",
"initGUI": (option, control) => {
control.max_value = option.max;
control.min_value = option.min;
},
"tooltip": (value, option) =>
sprintf(translateWithContext("slider number", "Value: %(val)s (min: %(min)s, max: %(max)s)"), {
"val": value.toFixed(2),
"min": option.min.toFixed(2),
"max": option.max.toFixed(2)
})
}
};
function init(data, hotloadData)
{
g_ChangedKeys = hotloadData ? hotloadData.changedKeys : new Set();
g_TabCategorySelected = hotloadData ? hotloadData.tabCategorySelected : 0;
g_Options = Engine.ReadJSONFile("gui/options/options.json");
translateObjectKeys(g_Options, ["label", "tooltip"]);
deepfreeze(g_Options);
placeTabButtons(
g_Options,
false,
g_TabButtonHeight,
g_TabButtonDist,
selectPanel,
displayOptions);
}
function getHotloadData()
{
return {
"tabCategorySelected": g_TabCategorySelected,
"changedKeys": g_ChangedKeys
};
}
/**
* Sets up labels and controls of all options of the currently selected category.
*/
function displayOptions()
{
// Hide all controls
for (let body of Engine.GetGUIObjectByName("option_controls").children)
{
body.hidden = true;
for (let control of body.children)
control.hidden = true;
}
// Initialize label and control of each option for this category
for (let i = 0; i < g_Options[g_TabCategorySelected].options.length; ++i)
{
// Position vertically
let body = Engine.GetGUIObjectByName("option_control[" + i + "]");
let bodySize = body.size;
bodySize.top = g_OptionControlOffset + i * (g_OptionControlHeight + g_OptionControlDist);
bodySize.bottom = bodySize.top + g_OptionControlHeight;
body.size = bodySize;
body.hidden = false;
// Load option data
let option = g_Options[g_TabCategorySelected].options[i];
let optionType = g_OptionType[option.type];
let value = optionType.configToValue(Engine.ConfigDB_GetValue("user", option.config));
// Setup control
let control = Engine.GetGUIObjectByName("option_control_" + option.type + "[" + i + "]");
control.tooltip = option.tooltip + (optionType.tooltip ? "\n" + optionType.tooltip(value, option) : "");
control.hidden = false;
if (optionType.initGUI)
optionType.initGUI(option, control);
control[optionType.guiSetter] = function() {};
optionType.valueToGui(value, control);
if (optionType.sanitizeValue)
optionType.sanitizeValue(value, control, option);
control[optionType.guiSetter] = function() {
let value = optionType.guiToValue(control);
if (optionType.sanitizeValue)
optionType.sanitizeValue(value, control, option);
const oldValue = optionType.configToValue(Engine.ConfigDB_GetValue("user", option.config));
control.tooltip = option.tooltip + (optionType.tooltip ? "\n" + optionType.tooltip(value, option) : "");
const hasChanges = Engine.ConfigDB_HasChanges("user");
Engine.ConfigDB_CreateValue("user", option.config, String(value));
g_ChangedKeys.add(option.config);
fireConfigChangeHandlers(new Set([option.config]));
if (option.timeout)
optionType.timeout(option, oldValue, hasChanges, value);
if (option.function)
Engine[option.function](value);
enableButtons();
};
// Setup label
let label = Engine.GetGUIObjectByName("option_label[" + i + "]");
label.caption = option.label;
label.tooltip = option.tooltip;
label.hidden = false;
let labelSize = label.size;
labelSize.left = option.dependencies ? g_DependentLabelIndentation : 0;
labelSize.rright = control.size.rleft;
label.size = labelSize;
}
enableButtons();
}
/**
* Enable exactly the buttons whose dependencies are met.
*/
function enableButtons()
{
g_Options[g_TabCategorySelected].options.forEach((option, i) => {
const isDependencyMet = dependency => {
if (typeof dependency === "string")
return Engine.ConfigDB_GetValue("user", dependency) == "true";
else if (typeof dependency === "object")
{
const availableOps = {
"==": (config, value) => config == value,
- "!=": (config, value) => config != value
+ "!=": (config, value) => config != value,
+ "<": (config, value) => +config < +value,
+ "<=": (config, value) => +config <= +value,
+ ">": (config, value) => +config > +value,
+ ">=": (config, value) => +config >= +value
};
const op = availableOps[dependency.op] || availableOps["=="];
return op(Engine.ConfigDB_GetValue("user", dependency.config), dependency.value);
}
error("Unsupported dependency: " + uneval(dependency));
return false;
};
const enabled = !option.dependencies || option.dependencies.every(isDependencyMet);
Engine.GetGUIObjectByName("option_label[" + i + "]").enabled = enabled;
Engine.GetGUIObjectByName("option_control_" + option.type + "[" + i + "]").enabled = enabled;
});
const hasChanges = Engine.ConfigDB_HasChanges("user");
Engine.GetGUIObjectByName("revertChanges").enabled = hasChanges;
Engine.GetGUIObjectByName("saveChanges").enabled = hasChanges;
}
function setDefaults()
{
messageBox(
500, 200,
translate("Resetting the options will erase your saved settings. Do you want to continue?"),
translate("Warning"),
[translate("No"), translate("Yes")],
[null, reallySetDefaults]
);
}
function reallySetDefaults()
{
for (let category in g_Options)
for (let option of g_Options[category].options)
{
Engine.ConfigDB_RemoveValue("user", option.config);
g_ChangedKeys.add(option.config);
}
Engine.ConfigDB_SaveChanges("user");
revertChanges();
}
function revertChange(option, oldValue, hadChanges)
{
Engine.ConfigDB_CreateValue("user", option.config, String(oldValue));
if (!hadChanges)
Engine.ConfigDB_SetChanges("user", false);
if (option.function)
Engine[option.function](oldValue);
displayOptions();
}
function revertChanges()
{
Engine.ConfigDB_Reload("user");
for (let category in g_Options)
for (let option of g_Options[category].options)
if (option.function)
Engine[option.function](
g_OptionType[option.type].configToValue(
Engine.ConfigDB_GetValue("user", option.config)));
displayOptions();
}
function saveChanges()
{
for (let category in g_Options)
for (let i = 0; i < g_Options[category].options.length; ++i)
{
let option = g_Options[category].options[i];
let optionType = g_OptionType[option.type];
if (!optionType.sanitizeValue)
continue;
let value = optionType.configToValue(Engine.ConfigDB_GetValue("user", option.config));
if (value == optionType.sanitizeValue(value, undefined, option))
continue;
selectPanel(category);
messageBox(
500, 200,
translate("Some setting values are invalid! Are you sure you want to save them?"),
translate("Warning"),
[translate("No"), translate("Yes")],
[null, reallySaveChanges]
);
return;
}
reallySaveChanges();
}
function reallySaveChanges()
{
Engine.ConfigDB_SaveChanges("user");
enableButtons();
}
/**
* Close GUI page and inform the parent GUI page which options changed.
**/
function closePage()
{
if (Engine.ConfigDB_HasChanges("user"))
messageBox(
500, 200,
translate("You have unsaved changes, do you want to close this window?"),
translate("Warning"),
[translate("No"), translate("Yes")],
[null, closePageWithoutConfirmation]);
else
closePageWithoutConfirmation();
}
function closePageWithoutConfirmation()
{
Engine.PopGuiPage(g_ChangedKeys);
}
Index: ps/trunk/binaries/data/mods/public/gui/options/options.json
===================================================================
--- ps/trunk/binaries/data/mods/public/gui/options/options.json (revision 28009)
+++ ps/trunk/binaries/data/mods/public/gui/options/options.json (revision 28010)
@@ -1,800 +1,829 @@
[
{
"label": "General",
"options":
[
{
"type": "string",
"label": "Player name (single-player)",
"tooltip": "How you want to be addressed in single-player matches.",
"config": "playername.singleplayer"
},
{
"type": "string",
"label": "Player name (multiplayer)",
"tooltip": "How you want to be addressed in multiplayer matches (except lobby).",
"config": "playername.multiplayer"
},
{
"type": "boolean",
"label": "Background pause",
"tooltip": "Pause single-player games when window loses focus.",
"config": "pauseonfocusloss",
"function": "PauseOnFocusLoss"
},
{
"type": "boolean",
"label": "Enable welcome screen",
"tooltip": "If you disable it, the welcome screen will still appear once, each time a new version is available. You can always launch it from the main menu.",
"config": "gui.splashscreen.enable"
},
{
"type": "boolean",
"label": "FPS overlay",
"tooltip": "Show frames per second in top right corner.",
"config": "overlay.fps"
},
{
"type": "boolean",
"label": "Real time overlay",
"tooltip": "Show current system time in top right corner.",
"config": "overlay.realtime"
},
{
"type": "boolean",
"label": "Game time overlay",
"tooltip": "Show current simulation time in top right corner.",
"config": "gui.session.timeelapsedcounter"
},
{
"type": "boolean",
"label": "Ceasefire time overlay",
"tooltip": "Always show the remaining ceasefire time.",
"config": "gui.session.ceasefirecounter"
},
{
"type": "boolean",
"label": "Chat timestamp",
"tooltip": "Display the time at which a chat message was posted.",
"config": "chat.timestamp"
},
{
"type": "dropdown",
"label": "Naming of entities.",
"tooltip": "How to show entity names.",
"config": "gui.session.howtoshownames",
"list": [
{
"value": 0,
"label": "Specific names first",
"tooltip": "Display specific names before generic names."
},
{
"value": 1,
"label": "Generic names first",
"tooltip": "Display generic names before specific names."
},
{
"value": 2,
"label": "Only specific names",
"tooltip": "Display only specific names for entities."
},
{
"value": 3,
"label": "Only generic names",
"tooltip": "Display only generic names for entities."
}
]
}
]
},
{
"label": "Graphics (general)",
"tooltip": "Set the balance between performance and visual appearance.",
"options":
[
{
"type": "boolean",
"label": "Windowed mode",
"tooltip": "Start 0Â A.D. in a window.",
"config": "windowed"
},
{
"type": "boolean",
"label": "Fog",
"tooltip": "Enable fog.",
"config": "fog"
},
{
"type": "boolean",
"label": "Post-processing",
"tooltip": "Use screen-space post-processing filters (HDR, Bloom, DOF, etc).",
"config": "postproc"
},
{
+ "type": "dropdownNumber",
+ "label": "Resolution scale",
+ "tooltip": "A smaller scale makes rendering faster but produces a more blurry picture, a large scale makes rendering slower but produces a better picture.",
+ "dependencies": ["postproc"],
+ "config": "renderer.scale",
+ "list": [
+ { "value": 0.5, "label": "50%" },
+ { "value": 0.75, "label": "75%" },
+ { "value": 0.875, "label": "87.5%" },
+ { "value": 1.00, "label": "100%" },
+ { "value": 1.25, "label": "125%" },
+ { "value": 1.50, "label": "150%" },
+ { "value": 1.75, "label": "175%" },
+ { "value": 2.00, "label": "200%" }
+ ]
+ },
+ {
+ "type": "dropdown",
+ "label": "Upscale technique",
+ "tooltip": "Technique defines performance and quality of upscaling process.",
+ "dependencies": ["postproc", { "config": "renderer.scale", "op": "<", "value": 1.0 }],
+ "config": "renderer.upscale.technique",
+ "list": [
+ { "value": "fsr", "label": "FidelityFX Super Resolution 1.0", "tooltip": "Advanced upscale technique. For better results, use FSR with antialiasing enabled. Using it with the OpenGL backend may have some issues, consider using Vulkan backend instead." },
+ { "value": "bilinear", "label": "Bilinear", "tooltip": "Bilinear upscale technique. Produces a slightly blurry picture depending on the scale." },
+ { "value": "pixelated", "label": "Pixelated", "tooltip": "Simplest upscale technique. Used mostly for stylized effect." }
+ ]
+ },
+ {
"type": "boolean",
"label": "Shadows",
"tooltip": "Enable shadows.",
"config": "shadows"
},
{
"type": "boolean",
"label": "Unit silhouettes",
"tooltip": "Show outlines of units behind structures.",
"config": "silhouettes"
},
{
"type": "boolean",
"label": "Particles",
"tooltip": "Enable particles.",
"config": "particles"
},
{
"type": "boolean",
"label": "VSync",
"tooltip": "Run vertical sync to fix screen tearing. REQUIRES GAME RESTART",
"config": "vsync"
},
{
"type": "slider",
"label": "FPS throttling in menus",
"tooltip": "To save CPU workload, throttle render frequency in all menus. Set to maximum to disable throttling.",
"config": "adaptivefps.menu",
"min": 20,
"max": 360
},
{
"type": "slider",
"label": "FPS throttling in games",
"tooltip": "To save CPU workload, throttle render frequency in running games. Set to maximum to disable throttling.",
"config": "adaptivefps.session",
"min": 20,
"max": 360
},
{
"type": "dropdownNumber",
"label": "GUI scale",
"timeout": 5000,
"tooltip": "GUI scale",
"config": "gui.scale",
"function": "SetGUIScale",
"list": [
{ "value": 0.75, "label": "75%" },
{ "value": 1.00, "label": "100%" },
{ "value": 1.25, "label": "125%" },
{ "value": 1.50, "label": "150%" },
{ "value": 1.75, "label": "175%" },
{ "value": 2.00, "label": "200%" },
{ "value": 2.25, "label": "225%" },
{ "value": 2.50, "label": "250%" }
]
},
{
"type": "number",
"label": "Mouse drag",
"tooltip": "Number of pixels the mouse can move before the action is considered a drag.",
"config": "gui.session.dragdelta",
"min": "1",
"max": "200"
},
{
"type": "boolean",
"label": "Mouse grab in fullscreen",
"tooltip": "Constrain mouse in the fullscreen mode to the window boundaries. It's used to avoid mouse going out of a display in case of multiple displays.",
"config": "window.mousegrabinfullscreen"
},
{
"type": "boolean",
"label": "Mouse grab in window mode",
"tooltip": "Constrain mouse in the window mode to the window boundaries.",
"config": "window.mousegrabinwindowmode"
}
]
},
{
"label": "Graphics (advanced)",
"tooltip": "More specific rendering settings.",
"options":
[
{
"type": "dropdown",
"label": "Renderer backend",
"tooltip": "Choose the renderer's backend. REQUIRES GAME RESTART",
"config": "rendererbackend",
"list": [
{ "value": "gl", "label": "OpenGL", "tooltip": "Default OpenGL backend with GLSL. REQUIRES GAME RESTART" },
{ "value": "glarb", "label": "OpenGL ARB", "tooltip": "Legacy OpenGL backend with ARB shaders. REQUIRES GAME RESTART" },
{ "value": "vulkan", "label": "Vulkan", "tooltip": "Modern API, requires up-to-date drivers. REQUIRES GAME RESTART" }
]
},
{
"type": "boolean",
"label": "Fog",
"tooltip": "Enable fog.",
"dependencies": [{ "config": "rendererbackend", "op": "!=", "value": "glarb" }],
"config": "fog"
},
{
"type": "boolean",
"label": "Post-processing",
"tooltip": "Use screen-space post-processing filters (HDR, Bloom, DOF, etc).",
"config": "postproc"
},
{
"type": "dropdown",
"label": "Antialiasing",
"tooltip": "Reduce aliasing effect on edges.",
"dependencies": ["postproc", { "config": "rendererbackend", "op": "!=", "value": "glarb" }],
"config": "antialiasing",
"list": [
{ "value": "disabled", "label": "Disabled", "tooltip": "Do not use antialiasing." },
{ "value": "fxaa", "label": "FXAA", "tooltip": "Fast, but simple antialiasing." },
{ "value": "msaa2", "label": "MSAA (2×)", "tooltip": "Slow, but high-quality antialiasing, uses two samples per pixel. Supported for GL3.3+." },
{ "value": "msaa4", "label": "MSAA (4×)", "tooltip": "Slow, but high-quality antialiasing, uses four samples per pixel. Supported for GL3.3+." },
{ "value": "msaa8", "label": "MSAA (8×)", "tooltip": "Slow, but high-quality antialiasing, uses eight samples per pixel. Supported for GL3.3+." },
{ "value": "msaa16", "label": "MSAA (16×)", "tooltip": "Slow, but high-quality antialiasing, uses sixteen samples per pixel. Supported for GL3.3+." }
]
},
{
"type": "dropdown",
"label": "Sharpening",
"tooltip": "Reduce blurry effects.",
"dependencies": ["postproc", { "config": "rendererbackend", "op": "!=", "value": "glarb" }],
"config": "sharpening",
"list": [
{ "value": "disabled", "label": "Disabled", "tooltip": "Do not use sharpening." },
{ "value": "cas", "label": "FidelityFX CAS", "tooltip": "Contrast adaptive sharpening, a fast, contrast based sharpening pass." }
]
},
{
"type": "slider",
"label": "Sharpness factor",
"tooltip": "The sharpness of the choosen pass.",
"dependencies": [
"postproc",
{ "config": "rendererbackend", "op": "!=", "value": "glarb" },
{ "config": "sharpening", "op": "!=", "value": "disabled" }
],
"config": "sharpness",
"min": 0,
"max": 1
},
{
"type": "dropdown",
"label": "Model quality",
"tooltip": "Model quality setting.",
"config": "max_actor_quality",
"list": [
{ "value": 100, "label": { "_string": "Low", "context": "Option for the meshes' level of detail." }, "tooltip": "Simpler models for better performance." },
{ "value": 150, "label": { "_string": "Medium", "context": "Option for the meshes' level of detail." }, "tooltip": "Average quality and average performance." },
{ "value": 200, "label": { "_string": "High", "context": "Option for the meshes' level of detail." }, "tooltip": "High quality models." }
]
},
{
"type": "dropdown",
"label": "Model appearance randomization",
"tooltip": "Randomize the appearance of entities. Disabling gives a small performance improvement.",
"config": "variant_diversity",
"list": [
{ "value": "none", "label": { "_string": "None", "context": "Option for the meshes' amount of variety." }, "tooltip": "Entities will all look the same." },
{ "value": "limited", "label": { "_string": "Limited", "context": "Option for the meshes' amount of variety." }, "tooltip": "Entities will be less diverse." },
{ "value": "full", "label": { "_string": "Normal", "context": "Option for the meshes' amount of variety." }, "tooltip": "Entities appearance is randomized normally." }
]
},
{
"type": "slider",
"label": "Shader effects",
"tooltip": "Number of shader effects. REQUIRES GAME RESTART",
"config": "materialmgr.quality",
"min": 0,
"max": 10
},
{
"type": "boolean",
"label": "Shadows",
"tooltip": "Enable shadows.",
"config": "shadows"
},
{
"type": "dropdown",
"label": "Quality",
"tooltip": "Shadow map resolution. High values can crash the game when using a graphics card with low memory!",
"dependencies": ["shadows"],
"config": "shadowquality",
"list": [
{ "value": -1, "label": { "_string": "Low", "context": "Option for the shadow quality." } },
{ "value": 0, "label": { "_string": "Medium", "context": "Option for the shadow quality." } },
{ "value": 1, "label": { "_string": "High", "context": "Option for the shadow quality." } },
{ "value": 2, "label": { "_string": "Very High", "context": "Option for the shadow quality." } }
]
},
{
"type": "boolean",
"label": "Filtering",
"tooltip": "Smooth shadows.",
"dependencies": ["shadows"],
"config": "shadowpcf"
},
{
"type": "slider",
"label": "Cutoff distance",
"tooltip": "Hides shadows beyond a certain distance from a camera.",
"dependencies": ["shadows"],
"config": "shadowscutoffdistance",
"min": 100,
"max": 1500
},
{
"type": "boolean",
"label": "Cover whole map",
"tooltip": "When ON shadows cover the whole map and shadows cutoff distance is ignored. Useful for making screenshots of a whole map.",
"dependencies": ["shadows"],
"config": "shadowscovermap"
},
{
"type": "boolean",
"label": "Water effects",
"tooltip": "When OFF, use the lowest settings possible to render water. This makes other settings irrelevant.",
"config": "watereffects"
},
{
"type": "boolean",
"label": "High-quality water effects",
"tooltip": "Use higher-quality effects for water, rendering coastal waves, shore foam, and ships trails.",
"dependencies": ["watereffects"],
"config": "waterfancyeffects"
},
{
"type": "boolean",
"label": "Water reflections",
"tooltip": "Allow water to reflect a mirror image.",
"dependencies": ["watereffects"],
"config": "waterreflection"
},
{
"type": "boolean",
"label": "Water refraction",
"tooltip": "Use a real water refraction map and not transparency.",
"dependencies": ["watereffects"],
"config": "waterrefraction"
},
{
"type": "boolean",
"label": "Real water depth",
"tooltip": "Use actual water depth in rendering calculations.",
"dependencies": ["watereffects", "waterrefraction"],
"config": "waterrealdepth"
},
{
"type": "dropdown",
"label": "Texture quality",
"tooltip": "Decrease texture quality making them blurrier but increases game performance.",
"config": "textures.quality",
"list": [
{ "value": 0, "label": { "_string": "Low", "context": "Option for the texture quality." }, "tooltip": "Low" },
{ "value": 1, "label": { "_string": "Medium", "context": "Option for the texture quality." }, "tooltip": "Medium" },
{ "value": 2, "label": { "_string": "High", "context": "Option for the texture quality." }, "tooltip": "High" }
]
},
{
"type": "dropdown",
"label": "Texture anisotropic filter",
"tooltip": "Makes textures look better, especially terrain. If the anisotropic filter value is unsupported it will be set to the max supported value.",
"config": "textures.maxanisotropy",
"list": [
{ "value": 1, "label": "1x", "tooltip": "Disabled" },
{ "value": 2, "label": "2x", "tooltip": "2x" },
{ "value": 4, "label": "4x", "tooltip": "4x" },
{ "value": 8, "label": "8x", "tooltip": "8x" },
{ "value": 16, "label": "16x", "tooltip": "16x" }
]
}
]
},
{
"label": "Sound",
"options":
[
{
"type": "slider",
"label": "Master volume",
"tooltip": "Master audio gain.",
"config": "sound.mastergain",
"function": "SetMasterGain",
"min": 0,
"max": 2
},
{
"type": "slider",
"label": "Music volume",
"tooltip": "In game music gain.",
"config": "sound.musicgain",
"function": "SetMusicGain",
"min": 0,
"max": 2
},
{
"type": "slider",
"label": "Ambient volume",
"tooltip": "In game ambient sound gain.",
"config": "sound.ambientgain",
"function": "SetAmbientGain",
"min": 0,
"max": 2
},
{
"type": "slider",
"label": "Action volume",
"tooltip": "In game unit action sound gain.",
"config": "sound.actiongain",
"function": "SetActionGain",
"min": 0,
"max": 2
},
{
"type": "slider",
"label": "UI volume",
"tooltip": "UI sound gain.",
"config": "sound.uigain",
"function": "SetUIGain",
"min": 0,
"max": 2
},
{
"type": "boolean",
"label": "Nick notification",
"tooltip": "Receive audio notification when someone types your nick.",
"config": "sound.notify.nick"
},
{
"type": "boolean",
"label": "New player notification in game setup",
"tooltip": "Receive audio notification when a new client joins the game setup.",
"config": "sound.notify.gamesetup.join"
}
]
},
{
"label": "Game Setup",
"options":
[
{
"type": "boolean",
"label": "Enable game setting tips",
"tooltip": "Show tips when setting up a game.",
"config": "gui.gamesetup.enabletips"
},
{
"type": "boolean",
"label": "Enable settings panel slide",
"tooltip": "Slide the settings panel when opening, closing or resizing.",
"config": "gui.gamesetup.settingsslide"
},
{
"type": "boolean",
"label": "Persist match settings",
"tooltip": "Save and restore match settings for quick reuse when hosting another game.",
"config": "persistmatchsettings"
},
{
"type": "dropdown",
"label": "Default AI difficulty",
"tooltip": "Default difficulty of the AI.",
"config": "gui.gamesetup.aidifficulty",
"list": [
{ "value": 0, "label": { "_string": "Sandbox", "context": "Option for the AI difficulty." }},
{ "value": 1, "label": { "_string": "Very Easy", "context": "Option for the AI difficulty." }},
{ "value": 2, "label": { "_string": "Easy", "context": "Option for the AI difficulty." }},
{ "value": 3, "label": { "_string": "Medium", "context": "Option for the AI difficulty." }},
{ "value": 4, "label": { "_string": "Hard", "context": "Option for the AI difficulty." }},
{ "value": 5, "label": { "_string": "Very Hard", "context": "Option for the AI difficulty." }}
]
},
{
"type": "dropdown",
"label": "Default AI behavior",
"tooltip": "Default behavior of the AI.",
"config": "gui.gamesetup.aibehavior",
"list": [
{ "value": "random", "label": "Random" },
{ "value": "balanced", "label": "Balanced" },
{ "value": "aggressive", "label": "Aggressive" },
{ "value": "defensive", "label": "Defensive" }
]
},
{
"type": "dropdown",
"label": "Assign players",
"tooltip": "Automatically assign joining clients to free player slots during the match setup.",
"config": "gui.gamesetup.assignplayers",
"list": [
{
"value": "everyone",
"label": "Everyone",
"tooltip": "Players joining the match will be assigned if there is a free slot."
},
{
"value": "buddies",
"label": "Buddies",
"tooltip": "Players joining the match will only be assigned if they are a buddy of the host and if there is a free slot."
},
{
"value": "disabled",
"label": "Disabled",
"tooltip": "Players only receive a slot when the host assigns them explicitly."
}
]
}
]
},
{
"label": "Networking / Lobby",
"tooltip": "These settings only affect the multiplayer.",
"options":
[
{
"type": "boolean",
"label": "TLS encryption",
"tooltip": "Protect login and data exchanged with the lobby server using TLS encryption.",
"config": "lobby.tls"
},
{
"type": "number",
"label": "Chat backlog",
"tooltip": "Number of backlogged messages to load when joining the lobby.",
"config": "lobby.history",
"min": "0"
},
{
"type": "boolean",
"label": "Game rating column",
"tooltip": "Show the average rating of the participating players in a column of the gamelist.",
"config": "lobby.columns.gamerating"
},
{
"type": "boolean",
"label": "Network warnings",
"tooltip": "Show which player has a bad connection in multiplayer games.",
"config": "overlay.netwarnings"
},
{
"type": "dropdown",
"label": "Late observer joins",
"tooltip": "Allow everybody or buddies only to join the game as observer after it started.",
"config": "network.lateobservers",
"list": [
{ "value": "everyone", "label": "Everyone" },
{ "value": "buddies", "label": "Buddies" },
{ "value": "disabled", "label": "Disabled" }
]
},
{
"type": "number",
"label": "Observer limit",
"tooltip": "Prevent further observers from joining if the limit is reached.",
"config": "network.observerlimit",
"min": 0,
"max": 32
},
{
"type": "number",
"label": "Max lag for observers",
"tooltip": "When hosting, pause the game if observers are lagging more than this many turns. If set to -1, observers are ignored.",
"config": "network.observermaxlag",
"min": -1,
"max": 10000
},
{
"type": "boolean",
"label": "(Observer) Speed up when lagging.",
"tooltip": "When observing a game, automatically speed up if you start lagging, to catch up with the live match.",
"config": "network.autocatchup"
}
]
},
{
"label": "Game Session",
"tooltip": "Change options regarding the in-game settings.",
"options":
[
{
"type": "slider",
"label": "Wounded unit health",
"tooltip": "The wounded unit hotkey considers the selected units as wounded if their health percentage falls below this number.",
"config": "gui.session.woundedunithotkeythreshold",
"min": 0,
"max": 100
},
{
"type": "number",
"label": "Batch training size",
"tooltip": "Number of units trained per batch by default.",
"config": "gui.session.batchtrainingsize",
"min": 1,
"max": 20
},
{
"type": "slider",
"label": "Scroll batch increment ratio",
"tooltip": "Number of times you have to scroll to increase/decrease the batchsize by 1.",
"config": "gui.session.scrollbatchratio",
"min": 0.1,
"max": 30
},
{
"type": "slider",
"label": "Flare display duration",
"tooltip": "How long the flare markers on the minimap are displayed in seconds.",
"config": "gui.session.flarelifetime",
"min": 0,
"max": 60
},
{
"type": "boolean",
"label": "Minimap icons",
"tooltip": "Show special icons for some entities on the minimap.",
"config": "gui.session.minimap.icons.enabled"
},
{
"type": "boolean",
"label": "Chat notification attack",
"tooltip": "Show a chat notification if you are attacked by another player.",
"config": "gui.session.notifications.attack"
},
{
"type": "boolean",
"label": "Chat notification tribute",
"tooltip": "Show a chat notification if an ally tributes resources to another team member if teams are locked, and all tributes in observer mode.",
"config": "gui.session.notifications.tribute"
},
{
"type": "boolean",
"label": "Chat notification barter",
"tooltip": "Show a chat notification to observers when a player bartered resources.",
"config": "gui.session.notifications.barter"
},
{
"type": "dropdown",
"label": "Chat notification phase",
"tooltip": "Show a chat notification if you or an ally have started, aborted or completed a new phase, and phases of all players in observer mode.",
"config": "gui.session.notifications.phase",
"list": [
{ "value": "none", "label": "Disable" },
{ "value": "completed", "label": "Completed" },
{ "value": "all", "label": "All displayed" }
]
},
{
"type": "boolean",
"label": "Attack range visualization",
"tooltip": "Display the attack range of selected defensive structures. (It can also be toggled with the hotkey during a game).",
"config": "gui.session.attackrange"
},
{
"type": "boolean",
"label": "Aura range visualization",
"tooltip": "Display the range of auras of selected units and structures. (It can also be toggled with the hotkey during a game).",
"config": "gui.session.aurasrange"
},
{
"type": "boolean",
"label": "Heal range visualization",
"tooltip": "Display the healing range of selected units. (It can also be toggled with the hotkey during a game).",
"config": "gui.session.healrange"
},
{
"type": "boolean",
"label": "Rank icon above status bar",
"tooltip": "Show rank icons above status bars.",
"config": "gui.session.rankabovestatusbar"
},
{
"type": "boolean",
"label": "Experience status bar",
"tooltip": "Show an experience status bar above each selected unit.",
"config": "gui.session.experiencestatusbar"
},
{
"type": "boolean",
"label": "Detailed tooltips",
"tooltip": "Show detailed tooltips for trainable units in unit-producing structures.",
"config": "showdetailedtooltips"
},
{
"type": "dropdown",
"label": "Sort resources and population tooltip",
"tooltip": "Dynamically sort players in the resources and population tooltip by value.",
"config": "gui.session.respoptooltipsort",
"list": [
{ "value": 0, "label": "Unordered" },
{ "value": -1, "label": "Ascending" },
{ "value": 1, "label": "Descending" }
]
},
{
"type": "color",
"label": "Diplomacy colors: self",
"tooltip": "Color of your units when diplomacy colors are enabled.",
"config": "gui.session.diplomacycolors.self"
},
{
"type": "color",
"label": "Diplomacy colors: ally",
"tooltip": "Color of allies when diplomacy colors are enabled.",
"config": "gui.session.diplomacycolors.ally"
},
{
"type": "color",
"label": "Diplomacy colors: neutral",
"tooltip": "Color of neutral players when diplomacy colors are enabled.",
"config": "gui.session.diplomacycolors.neutral"
},
{
"type": "color",
"label": "Diplomacy colors: enemy",
"tooltip": "Color of enemies when diplomacy colors are enabled.",
"config": "gui.session.diplomacycolors.enemy"
},
{
"type": "dropdown",
"label": "Snap to edges",
"tooltip": "This option allows to align new structures with nearby structures.",
"config": "gui.session.snaptoedges",
"list": [
{
"value": "disabled",
"label": "Hotkey to enable snapping",
"tooltip": "New structures are aligned with nearby structures while pressing the hotkey."
},
{
"value": "enabled",
"label": "Hotkey to disable snapping",
"tooltip": "New structures are aligned with nearby structures unless the hotkey is pressed."
}
]
},
{
"type": "dropdown",
"label": "Control group membership",
"tooltip": "Decide whether units can be part of multiple control groups.",
"config": "gui.session.disjointcontrolgroups",
"list": [
{
"value": "true",
"label": "Single",
"tooltip": "When adding a Unit or Structure to a control group, they are removed from other control groups. Use this choice if you want control groups to refer to distinct armies."
},
{
"value": "false",
"label": "Multiple",
"tooltip": "Units and Structures can be part of multiple control groups. This is useful to keep control groups for distinct armies and a control group for the entire army simultaneously."
}
]
},
{
"type": "dropdown",
"label": "Formation control",
"tooltip": "Decide whether formations are enabled for all orders or only 'Walk' and 'Patrol'.",
"config": "gui.session.formationwalkonly",
"list": [
{
"value": "true",
"label": "Walk/Patrol Only",
"tooltip": "Other orders will disband existing formations."
},
{
"value": "false",
"label": "No override",
"tooltip": "Units in formations stay in formations."
}
]
},
{
"type": "boolean",
"label": "Battalion-style formations",
"tooltip": "Whether formations are selected as a whole.",
"config": "gui.session.selectformationasone"
}
]
}
]
Index: ps/trunk/source/graphics/ShaderManager.h
===================================================================
--- ps/trunk/source/graphics/ShaderManager.h (revision 28009)
+++ ps/trunk/source/graphics/ShaderManager.h (revision 28010)
@@ -1,147 +1,148 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#ifndef INCLUDED_SHADERMANAGER
#define INCLUDED_SHADERMANAGER
#include "graphics/ShaderDefines.h"
#include "graphics/ShaderProgram.h"
#include "graphics/ShaderTechnique.h"
+#include "renderer/backend/IDevice.h"
#include "renderer/backend/PipelineState.h"
#include
#include
#include
#include
/**
* Shader manager: loads and caches shader programs.
*
* For a high-level overview of shaders and materials, see
* http://trac.wildfiregames.com/wiki/MaterialSystem
*/
class CShaderManager
{
public:
CShaderManager(Renderer::Backend::IDevice* device);
~CShaderManager();
/**
* Load a shader effect.
* Effects can be implemented via many techniques; this returns the best usable technique.
* @param name name of effect XML specification (file is loaded from shaders/effects/${name}.xml)
* @param defines key/value set of preprocessor definitions
* @return loaded technique, or empty technique on error
*/
CShaderTechniquePtr LoadEffect(CStrIntern name, const CShaderDefines& defines);
/**
* Load a shader effect, with empty defines.
*/
CShaderTechniquePtr LoadEffect(CStrIntern name);
/**
* Load a shader effect with the pipeline state description overwriting.
* TODO: we should set all needed states in XML.
*/
using PipelineStateDescCallback = CShaderTechnique::PipelineStateDescCallback;
CShaderTechniquePtr LoadEffect(
CStrIntern name, const CShaderDefines& defines, const PipelineStateDescCallback& callback);
/**
* Returns the number of shader effects that are currently loaded.
*/
size_t GetNumEffectsLoaded() const;
private:
struct CacheKey
{
std::string name;
CShaderDefines defines;
bool operator<(const CacheKey& k) const
{
if (name < k.name) return true;
if (k.name < name) return false;
return defines < k.defines;
}
};
Renderer::Backend::IDevice* m_Device = nullptr;
// A CShaderProgram contains expensive backend state, so we ought to cache it.
// The compiled state depends solely on the filename and list of defines,
// so we store that in CacheKey.
// TODO: is this cache useful when we already have an effect cache?
std::map m_ProgramCache;
/**
* Key for effect cache lookups.
* This stores two separate CShaderDefines because the renderer typically
* has one set from the rendering context and one set from the material;
* by handling both separately here, we avoid the cost of having to merge
* the two sets into a single one before doing the cache lookup.
*/
struct EffectCacheKey
{
CStrIntern name;
CShaderDefines defines;
bool operator==(const EffectCacheKey& b) const;
};
struct EffectCacheKeyHash
{
size_t operator()(const EffectCacheKey& key) const;
};
using EffectCacheMap = std::unordered_map;
EffectCacheMap m_EffectCache;
// Store the set of shaders that need to be reloaded when the given file is modified
template
using HotloadFilesMap = std::unordered_map<
VfsPath,
std::set, std::owner_less>>>;
HotloadFilesMap m_HotloadTechniques;
HotloadFilesMap m_HotloadPrograms;
/**
* Load a shader program.
* @param name name of shader XML specification (file is loaded from shaders/${name}.xml)
* @param defines key/value set of preprocessor definitions
* @return loaded program, or null pointer on error
*/
CShaderProgramPtr LoadProgram(const CStr& name, const CShaderDefines& defines);
bool LoadTechnique(CShaderTechniquePtr& tech);
static Status ReloadChangedFileCB(void* param, const VfsPath& path);
Status ReloadChangedFile(const VfsPath& path);
/**
* Associates the file with the technique to be reloaded if the file has changed.
*/
void AddTechniqueFileDependency(const CShaderTechniquePtr& technique, const VfsPath& path);
/**
* Associates the file with the program to be reloaded if the file has changed.
*/
void AddProgramFileDependency(const CShaderProgramPtr& program, const VfsPath& path);
};
#endif // INCLUDED_SHADERMANAGER
Index: ps/trunk/source/graphics/ShaderTechnique.h
===================================================================
--- ps/trunk/source/graphics/ShaderTechnique.h (revision 28009)
+++ ps/trunk/source/graphics/ShaderTechnique.h (revision 28010)
@@ -1,102 +1,111 @@
-/* Copyright (C) 2022 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#ifndef INCLUDED_SHADERTECHNIQUE
#define INCLUDED_SHADERTECHNIQUE
#include "graphics/ShaderDefines.h"
#include "graphics/ShaderProgram.h"
#include "graphics/ShaderTechniquePtr.h"
#include "lib/code_annotation.h"
#include "lib/file/vfs/vfs_path.h"
#include "renderer/backend/PipelineState.h"
#include
#include
#include
/**
* Implements a render pass consisting of a pipeline state and a shader,
* used by CShaderTechnique.
*/
class CShaderPass
{
public:
CShaderPass(
std::unique_ptr pipelineState,
const CShaderProgramPtr& shader);
MOVABLE(CShaderPass);
const CShaderProgramPtr& GetShaderProgram() const noexcept { return m_Shader; }
Renderer::Backend::IGraphicsPipelineState*
GetPipelineState() const noexcept { return m_PipelineState.get(); }
private:
CShaderProgramPtr m_Shader;
std::unique_ptr m_PipelineState;
};
/**
* Implements a render technique consisting of a sequence of passes.
* CShaderManager loads these from shader effect XML files.
*/
class CShaderTechnique
{
public:
using PipelineStateDescCallback =
std::function;
CShaderTechnique(const VfsPath& path, const CShaderDefines& defines, const PipelineStateDescCallback& callback);
void SetPasses(std::vector&& passes);
+ void SetComputePipelineState(
+ std::unique_ptr pipelineState,
+ const CShaderProgramPtr& computeShader);
int GetNumPasses() const;
Renderer::Backend::IShaderProgram* GetShader(int pass = 0) const;
Renderer::Backend::IGraphicsPipelineState*
GetGraphicsPipelineState(int pass = 0) const;
+ Renderer::Backend::IComputePipelineState*
+ GetComputePipelineState() const;
+
/**
* Whether this technique uses alpha blending that requires objects to be
* drawn from furthest to nearest.
*/
bool GetSortByDistance() const;
void SetSortByDistance(bool enable);
const VfsPath& GetPath() { return m_Path; }
const CShaderDefines& GetShaderDefines() { return m_Defines; }
const PipelineStateDescCallback& GetPipelineStateDescCallback() const { return m_PipelineStateDescCallback; };
private:
std::vector m_Passes;
bool m_SortByDistance = false;
// We need additional data to reload the technique.
VfsPath m_Path;
CShaderDefines m_Defines;
PipelineStateDescCallback m_PipelineStateDescCallback;
+
+ std::unique_ptr m_ComputePipelineState;
+ CShaderProgramPtr m_ComputeShader;
};
#endif // INCLUDED_SHADERTECHNIQUE
Index: ps/trunk/source/renderer/PostprocManager.cpp
===================================================================
--- ps/trunk/source/renderer/PostprocManager.cpp (revision 28009)
+++ ps/trunk/source/renderer/PostprocManager.cpp (revision 28010)
@@ -1,697 +1,959 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#include "precompiled.h"
#include "renderer/PostprocManager.h"
#include "graphics/GameView.h"
#include "graphics/LightEnv.h"
#include "graphics/ShaderManager.h"
#include "lib/bits.h"
#include "maths/MathUtil.h"
#include "ps/ConfigDB.h"
#include "ps/CLogger.h"
#include "ps/CStrInternStatic.h"
#include "ps/Filesystem.h"
#include "ps/Game.h"
#include "ps/World.h"
#include "renderer/backend/IDevice.h"
#include "renderer/Renderer.h"
#include "renderer/RenderingOptions.h"
#include "tools/atlas/GameInterface/GameLoop.h"
#include
namespace
{
void DrawFullscreenQuad(
Renderer::Backend::IVertexInputLayout* vertexInputLayout,
Renderer::Backend::IDeviceCommandContext* deviceCommandContext)
{
float quadVerts[] =
{
1.0f, 1.0f,
-1.0f, 1.0f,
-1.0f, -1.0f,
-1.0f, -1.0f,
1.0f, -1.0f,
1.0f, 1.0f
};
const bool flip =
deviceCommandContext->GetDevice()->GetBackend() == Renderer::Backend::Backend::VULKAN;
const float bottomV = flip ? 1.0 : 0.0f;
const float topV = flip ? 0.0f : 1.0f;
float quadTex[] =
{
1.0f, topV,
0.0f, topV,
0.0f, bottomV,
0.0f, bottomV,
1.0f, bottomV,
1.0f, topV
};
deviceCommandContext->SetVertexInputLayout(vertexInputLayout);
deviceCommandContext->SetVertexBufferData(
0, quadVerts, std::size(quadVerts) * sizeof(quadVerts[0]));
deviceCommandContext->SetVertexBufferData(
1, quadTex, std::size(quadTex) * sizeof(quadTex[0]));
deviceCommandContext->Draw(0, 6);
}
} // anonymous namespace
CPostprocManager::CPostprocManager(Renderer::Backend::IDevice* device)
: m_Device(device), m_IsInitialized(false), m_PostProcEffect(L"default"),
m_WhichBuffer(true), m_Sharpness(0.3f), m_UsingMultisampleBuffer(false),
m_MultisampleCount(0)
{
}
CPostprocManager::~CPostprocManager()
{
Cleanup();
}
bool CPostprocManager::IsEnabled() const
{
const bool isDepthStencilFormatPresent =
m_Device->GetPreferredDepthStencilFormat(
Renderer::Backend::ITexture::Usage::DEPTH_STENCIL_ATTACHMENT, true, true)
!= Renderer::Backend::Format::UNDEFINED;
return
g_RenderingOptions.GetPostProc() &&
m_Device->GetBackend() != Renderer::Backend::Backend::GL_ARB &&
isDepthStencilFormatPresent;
}
void CPostprocManager::Cleanup()
{
if (!m_IsInitialized) // Only cleanup if previously used
return;
m_CaptureFramebuffer.reset();
m_PingFramebuffer.reset();
m_PongFramebuffer.reset();
m_ColorTex1.reset();
m_ColorTex2.reset();
m_DepthTex.reset();
for (BlurScale& scale : m_BlurScales)
{
for (BlurScale::Step& step : scale.steps)
{
step.framebuffer.reset();
step.texture.reset();
}
}
}
void CPostprocManager::Initialize()
{
if (m_IsInitialized)
return;
const std::array attributes{{
{Renderer::Backend::VertexAttributeStream::POSITION,
Renderer::Backend::Format::R32G32_SFLOAT, 0, sizeof(float) * 2,
Renderer::Backend::VertexAttributeRate::PER_VERTEX, 0},
{Renderer::Backend::VertexAttributeStream::UV0,
Renderer::Backend::Format::R32G32_SFLOAT, 0, sizeof(float) * 2,
Renderer::Backend::VertexAttributeRate::PER_VERTEX, 1},
}};
m_VertexInputLayout = g_Renderer.GetVertexInputLayout(attributes);
const uint32_t maxSamples = m_Device->GetCapabilities().maxSampleCount;
const uint32_t possibleSampleCounts[] = {2, 4, 8, 16};
std::copy_if(
std::begin(possibleSampleCounts), std::end(possibleSampleCounts),
std::back_inserter(m_AllowedSampleCounts),
[maxSamples](const uint32_t sampleCount) { return sampleCount <= maxSamples; } );
// The screen size starts out correct and then must be updated with Resize()
- m_Width = g_Renderer.GetWidth();
- m_Height = g_Renderer.GetHeight();
+ RecalculateSize(g_Renderer.GetWidth(), g_Renderer.GetHeight());
RecreateBuffers();
m_IsInitialized = true;
// Once we have initialised the buffers, we can update the techniques.
UpdateAntiAliasingTechnique();
UpdateSharpeningTechnique();
UpdateSharpnessFactor();
+ CStr upscaleName;
+ CFG_GET_VAL("renderer.upscale.technique", upscaleName);
+ SetUpscaleTechnique(upscaleName);
// This might happen after the map is loaded and the effect chosen
SetPostEffect(m_PostProcEffect);
+
+ if (m_Device->GetCapabilities().computeShaders)
+ m_DownscaleComputeTech = g_Renderer.GetShaderManager().LoadEffect(CStrIntern("compute_downscale"));
}
void CPostprocManager::Resize()
{
- m_Width = g_Renderer.GetWidth();
- m_Height = g_Renderer.GetHeight();
+ RecalculateSize(g_Renderer.GetWidth(), g_Renderer.GetHeight());
// If the buffers were intialized, recreate them to the new size.
if (m_IsInitialized)
RecreateBuffers();
}
void CPostprocManager::RecreateBuffers()
{
Cleanup();
#define GEN_BUFFER_RGBA(name, w, h) \
name = m_Device->CreateTexture2D( \
"PostProc" #name, \
Renderer::Backend::ITexture::Usage::SAMPLED | \
Renderer::Backend::ITexture::Usage::COLOR_ATTACHMENT | \
Renderer::Backend::ITexture::Usage::TRANSFER_SRC | \
Renderer::Backend::ITexture::Usage::TRANSFER_DST, \
Renderer::Backend::Format::R8G8B8A8_UNORM, w, h, \
Renderer::Backend::Sampler::MakeDefaultSampler( \
Renderer::Backend::Sampler::Filter::LINEAR, \
Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE));
// Two fullscreen ping-pong textures.
GEN_BUFFER_RGBA(m_ColorTex1, m_Width, m_Height);
GEN_BUFFER_RGBA(m_ColorTex2, m_Width, m_Height);
+ if (m_UnscaledWidth != m_Width && m_Device->GetCapabilities().computeShaders)
+ {
+ const uint32_t usage =
+ Renderer::Backend::ITexture::Usage::TRANSFER_SRC |
+ Renderer::Backend::ITexture::Usage::COLOR_ATTACHMENT |
+ Renderer::Backend::ITexture::Usage::SAMPLED |
+ Renderer::Backend::ITexture::Usage::STORAGE;
+ m_UnscaledTexture1 = m_Device->CreateTexture2D(
+ "PostProcUnscaledTexture1", usage,
+ Renderer::Backend::Format::R8G8B8A8_UNORM,
+ m_UnscaledWidth, m_UnscaledHeight,
+ Renderer::Backend::Sampler::MakeDefaultSampler(
+ Renderer::Backend::Sampler::Filter::LINEAR,
+ Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE));
+
+ m_UnscaledTexture2 = m_Device->CreateTexture2D(
+ "PostProcUnscaledTexture2", usage,
+ Renderer::Backend::Format::R8G8B8A8_UNORM, m_UnscaledWidth, m_UnscaledHeight,
+ Renderer::Backend::Sampler::MakeDefaultSampler(
+ Renderer::Backend::Sampler::Filter::LINEAR,
+ Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE));
+
+ Renderer::Backend::SColorAttachment colorAttachment{};
+ colorAttachment.clearColor = CColor{0.0f, 0.0f, 0.0f, 0.0f};
+ colorAttachment.loadOp = Renderer::Backend::AttachmentLoadOp::LOAD;
+ colorAttachment.storeOp = Renderer::Backend::AttachmentStoreOp::STORE;
+
+ colorAttachment.texture = m_UnscaledTexture1.get();
+ m_UnscaledFramebuffer1 = m_Device->CreateFramebuffer("PostprocUnscaledFramebuffer1",
+ &colorAttachment, nullptr);
+
+ colorAttachment.texture = m_UnscaledTexture2.get();
+ m_UnscaledFramebuffer2 = m_Device->CreateFramebuffer("PostprocUnscaledFramebuffer2",
+ &colorAttachment, nullptr);
+ }
+
// Textures for several blur sizes. It would be possible to reuse
// m_BlurTex2b, thus avoiding the need for m_BlurTex4b and m_BlurTex8b, though given
// that these are fairly small it's probably not worth complicating the coordinates passed
// to the blur helper functions.
uint32_t width = m_Width / 2, height = m_Height / 2;
for (BlurScale& scale : m_BlurScales)
{
for (BlurScale::Step& step : scale.steps)
{
GEN_BUFFER_RGBA(step.texture, width, height);
Renderer::Backend::SColorAttachment colorAttachment{};
colorAttachment.texture = step.texture.get();
colorAttachment.loadOp = Renderer::Backend::AttachmentLoadOp::LOAD;
colorAttachment.storeOp = Renderer::Backend::AttachmentStoreOp::STORE;
colorAttachment.clearColor = CColor{0.0f, 0.0f, 0.0f, 0.0f};
step.framebuffer = m_Device->CreateFramebuffer(
"BlurScaleStepFramebuffer", &colorAttachment, nullptr);
}
width = std::max(1u, width / 2);
height = std::max(1u, height / 2);
}
#undef GEN_BUFFER_RGBA
// Allocate the Depth/Stencil texture.
m_DepthTex = m_Device->CreateTexture2D("PostProcDepthTexture",
Renderer::Backend::ITexture::Usage::SAMPLED |
Renderer::Backend::ITexture::Usage::DEPTH_STENCIL_ATTACHMENT,
m_Device->GetPreferredDepthStencilFormat(
Renderer::Backend::ITexture::Usage::SAMPLED |
Renderer::Backend::ITexture::Usage::DEPTH_STENCIL_ATTACHMENT,
true, true),
m_Width, m_Height,
Renderer::Backend::Sampler::MakeDefaultSampler(
Renderer::Backend::Sampler::Filter::LINEAR,
Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE));
// Set up the framebuffers with some initial textures.
Renderer::Backend::SColorAttachment colorAttachment{};
colorAttachment.texture = m_ColorTex1.get();
colorAttachment.loadOp = Renderer::Backend::AttachmentLoadOp::DONT_CARE;
colorAttachment.storeOp = Renderer::Backend::AttachmentStoreOp::STORE;
colorAttachment.clearColor = CColor{0.0f, 0.0f, 0.0f, 0.0f};
Renderer::Backend::SDepthStencilAttachment depthStencilAttachment{};
depthStencilAttachment.texture = m_DepthTex.get();
depthStencilAttachment.loadOp = Renderer::Backend::AttachmentLoadOp::CLEAR;
depthStencilAttachment.storeOp = Renderer::Backend::AttachmentStoreOp::STORE;
m_CaptureFramebuffer = m_Device->CreateFramebuffer("PostprocCaptureFramebuffer",
&colorAttachment, &depthStencilAttachment);
colorAttachment.texture = m_ColorTex1.get();
colorAttachment.loadOp = Renderer::Backend::AttachmentLoadOp::LOAD;
colorAttachment.storeOp = Renderer::Backend::AttachmentStoreOp::STORE;
m_PingFramebuffer = m_Device->CreateFramebuffer("PostprocPingFramebuffer",
&colorAttachment, nullptr);
colorAttachment.texture = m_ColorTex2.get();
m_PongFramebuffer = m_Device->CreateFramebuffer("PostprocPongFramebuffer",
&colorAttachment, nullptr);
if (!m_CaptureFramebuffer || !m_PingFramebuffer || !m_PongFramebuffer)
{
LOGWARNING("Failed to create postproc framebuffers");
g_RenderingOptions.SetPostProc(false);
}
if (m_UsingMultisampleBuffer)
{
DestroyMultisampleBuffer();
CreateMultisampleBuffer();
}
}
void CPostprocManager::ApplyBlurDownscale2x(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
Renderer::Backend::IFramebuffer* framebuffer,
Renderer::Backend::ITexture* inTex, int inWidth, int inHeight)
{
deviceCommandContext->BeginFramebufferPass(framebuffer);
Renderer::Backend::IDeviceCommandContext::Rect viewportRect{};
viewportRect.width = inWidth / 2;
viewportRect.height = inHeight / 2;
deviceCommandContext->SetViewports(1, &viewportRect);
// Get bloom shader with instructions to simply copy texels.
CShaderDefines defines;
defines.Add(str_BLOOM_NOP, str_1);
CShaderTechniquePtr tech = g_Renderer.GetShaderManager().LoadEffect(str_bloom, defines);
deviceCommandContext->SetGraphicsPipelineState(
tech->GetGraphicsPipelineState());
deviceCommandContext->BeginPass();
Renderer::Backend::IShaderProgram* shader = tech->GetShader();
deviceCommandContext->SetTexture(
shader->GetBindingSlot(str_renderedTex), inTex);
DrawFullscreenQuad(m_VertexInputLayout, deviceCommandContext);
deviceCommandContext->EndPass();
deviceCommandContext->EndFramebufferPass();
}
void CPostprocManager::ApplyBlurGauss(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
Renderer::Backend::ITexture* inTex,
Renderer::Backend::ITexture* tempTex,
Renderer::Backend::IFramebuffer* tempFramebuffer,
Renderer::Backend::IFramebuffer* outFramebuffer,
int inWidth, int inHeight)
{
deviceCommandContext->BeginFramebufferPass(tempFramebuffer);
Renderer::Backend::IDeviceCommandContext::Rect viewportRect{};
viewportRect.width = inWidth;
viewportRect.height = inHeight;
deviceCommandContext->SetViewports(1, &viewportRect);
// Get bloom shader, for a horizontal Gaussian blur pass.
CShaderDefines defines2;
defines2.Add(str_BLOOM_PASS_H, str_1);
CShaderTechniquePtr tech = g_Renderer.GetShaderManager().LoadEffect(str_bloom, defines2);
deviceCommandContext->SetGraphicsPipelineState(
tech->GetGraphicsPipelineState());
deviceCommandContext->BeginPass();
Renderer::Backend::IShaderProgram* shader = tech->GetShader();
deviceCommandContext->SetTexture(
shader->GetBindingSlot(str_renderedTex), inTex);
deviceCommandContext->SetUniform(
shader->GetBindingSlot(str_texSize), inWidth, inHeight);
DrawFullscreenQuad(m_VertexInputLayout, deviceCommandContext);
deviceCommandContext->EndPass();
deviceCommandContext->EndFramebufferPass();
deviceCommandContext->BeginFramebufferPass(outFramebuffer);
deviceCommandContext->SetViewports(1, &viewportRect);
// Get bloom shader, for a vertical Gaussian blur pass.
CShaderDefines defines3;
defines3.Add(str_BLOOM_PASS_V, str_1);
tech = g_Renderer.GetShaderManager().LoadEffect(str_bloom, defines3);
deviceCommandContext->SetGraphicsPipelineState(
tech->GetGraphicsPipelineState());
deviceCommandContext->BeginPass();
shader = tech->GetShader();
// Our input texture to the shader is the output of the horizontal pass.
deviceCommandContext->SetTexture(
shader->GetBindingSlot(str_renderedTex), tempTex);
deviceCommandContext->SetUniform(
shader->GetBindingSlot(str_texSize), inWidth, inHeight);
DrawFullscreenQuad(m_VertexInputLayout, deviceCommandContext);
deviceCommandContext->EndPass();
deviceCommandContext->EndFramebufferPass();
}
void CPostprocManager::ApplyBlur(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext)
{
uint32_t width = m_Width, height = m_Height;
Renderer::Backend::ITexture* previousTexture =
(m_WhichBuffer ? m_ColorTex1 : m_ColorTex2).get();
for (BlurScale& scale : m_BlurScales)
{
ApplyBlurDownscale2x(deviceCommandContext, scale.steps[0].framebuffer.get(), previousTexture, width, height);
width /= 2;
height /= 2;
ApplyBlurGauss(deviceCommandContext, scale.steps[0].texture.get(),
scale.steps[1].texture.get(), scale.steps[1].framebuffer.get(),
scale.steps[0].framebuffer.get(), width, height);
}
}
Renderer::Backend::IFramebuffer* CPostprocManager::PrepareAndGetOutputFramebuffer()
{
ENSURE(m_IsInitialized);
- // Leaves m_PingFbo selected for rendering; m_WhichBuffer stays true at this point.
+ // Leaves m_PingFramebuffer selected for rendering; m_WhichBuffer stays true at this point.
m_WhichBuffer = true;
return m_UsingMultisampleBuffer ? m_MultisampleFramebuffer.get() : m_CaptureFramebuffer.get();
}
+void CPostprocManager::UpscaleTextureByCompute(
+ Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
+ CShaderTechnique* shaderTechnique,
+ Renderer::Backend::ITexture* source,
+ Renderer::Backend::ITexture* destination)
+{
+ Renderer::Backend::IShaderProgram* shaderProgram = shaderTechnique->GetShader();
+
+ const std::array screenSize{{
+ static_cast(m_Width), static_cast(m_Height),
+ static_cast(m_UnscaledWidth), static_cast(m_UnscaledHeight)}};
+
+ constexpr uint32_t threadGroupWorkRegionDim = 16;
+ const uint32_t dispatchGroupCountX = DivideRoundUp(m_UnscaledWidth, threadGroupWorkRegionDim);
+ const uint32_t dispatchGroupCountY = DivideRoundUp(m_UnscaledHeight, threadGroupWorkRegionDim);
+
+ deviceCommandContext->BeginComputePass();
+ deviceCommandContext->SetComputePipelineState(
+ shaderTechnique->GetComputePipelineState());
+ deviceCommandContext->SetUniform(shaderProgram->GetBindingSlot(str_screenSize), screenSize);
+ deviceCommandContext->SetTexture(shaderProgram->GetBindingSlot(str_inTex), source);
+ deviceCommandContext->SetStorageTexture(shaderProgram->GetBindingSlot(str_outTex), destination);
+ deviceCommandContext->Dispatch(dispatchGroupCountX, dispatchGroupCountY, 1);
+ deviceCommandContext->EndComputePass();
+}
+
+void CPostprocManager::UpscaleTextureByFullscreenQuad(
+ Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
+ CShaderTechnique* shaderTechnique,
+ Renderer::Backend::ITexture* source,
+ Renderer::Backend::IFramebuffer* destination)
+{
+ Renderer::Backend::IShaderProgram* shaderProgram = shaderTechnique->GetShader();
+
+ const std::array screenSize{{
+ static_cast(m_Width), static_cast(m_Height),
+ static_cast(m_UnscaledWidth), static_cast(m_UnscaledHeight)}};
+
+ deviceCommandContext->BeginFramebufferPass(destination);
+
+ Renderer::Backend::IDeviceCommandContext::Rect viewportRect{};
+ viewportRect.width = destination->GetWidth();
+ viewportRect.height = destination->GetHeight();
+ deviceCommandContext->SetViewports(1, &viewportRect);
+
+ deviceCommandContext->SetGraphicsPipelineState(
+ shaderTechnique->GetGraphicsPipelineState());
+ deviceCommandContext->BeginPass();
+
+ deviceCommandContext->SetTexture(
+ shaderProgram->GetBindingSlot(str_inTex), source);
+ deviceCommandContext->SetUniform(shaderProgram->GetBindingSlot(str_screenSize), screenSize);
+
+ DrawFullscreenQuad(m_VertexInputLayout, deviceCommandContext);
+
+ deviceCommandContext->EndPass();
+ deviceCommandContext->EndFramebufferPass();
+}
+
+void CPostprocManager::ApplySharpnessAfterScale(
+ Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
+ CShaderTechnique* shaderTechnique,
+ Renderer::Backend::ITexture* source,
+ Renderer::Backend::ITexture* destination)
+{
+ Renderer::Backend::IShaderProgram* shaderProgram = shaderTechnique->GetShader();
+
+ // Recommended sharpness for RCAS.
+ constexpr float sharpness = 0.2f;
+
+ const std::array screenSize{ {
+ static_cast(m_Width), static_cast(m_Height),
+ static_cast(m_UnscaledWidth), static_cast(m_UnscaledHeight)} };
+
+ constexpr uint32_t threadGroupWorkRegionDim = 16;
+ const uint32_t dispatchGroupCountX = DivideRoundUp(m_UnscaledWidth, threadGroupWorkRegionDim);
+ const uint32_t dispatchGroupCountY = DivideRoundUp(m_UnscaledHeight, threadGroupWorkRegionDim);
+
+ deviceCommandContext->BeginComputePass();
+ deviceCommandContext->SetComputePipelineState(
+ shaderTechnique->GetComputePipelineState());
+ deviceCommandContext->SetUniform(shaderProgram->GetBindingSlot(str_sharpness), sharpness);
+ deviceCommandContext->SetUniform(shaderProgram->GetBindingSlot(str_screenSize), screenSize);
+ deviceCommandContext->SetTexture(shaderProgram->GetBindingSlot(str_inTex), source);
+ deviceCommandContext->SetStorageTexture(
+ shaderProgram->GetBindingSlot(str_outTex), destination);
+ deviceCommandContext->Dispatch(dispatchGroupCountX, dispatchGroupCountY, 1);
+ deviceCommandContext->EndComputePass();
+}
+
+void CPostprocManager::DownscaleTextureByCompute(
+ Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
+ CShaderTechnique* shaderTechnique,
+ Renderer::Backend::ITexture* source,
+ Renderer::Backend::ITexture* destination)
+{
+ Renderer::Backend::IShaderProgram* shaderProgram = shaderTechnique->GetShader();
+
+ const std::array screenSize{{
+ static_cast(m_Width), static_cast(m_Height),
+ static_cast(m_UnscaledWidth), static_cast(m_UnscaledHeight)}};
+
+ constexpr uint32_t threadGroupWorkRegionDim = 8;
+ const uint32_t dispatchGroupCountX = DivideRoundUp(m_UnscaledWidth, threadGroupWorkRegionDim);
+ const uint32_t dispatchGroupCountY = DivideRoundUp(m_UnscaledHeight, threadGroupWorkRegionDim);
+
+ deviceCommandContext->BeginComputePass();
+ deviceCommandContext->SetComputePipelineState(
+ shaderTechnique->GetComputePipelineState());
+ deviceCommandContext->SetUniform(shaderProgram->GetBindingSlot(str_screenSize), screenSize);
+ deviceCommandContext->SetTexture(shaderProgram->GetBindingSlot(str_inTex), source);
+ deviceCommandContext->SetStorageTexture(shaderProgram->GetBindingSlot(str_outTex), destination);
+ deviceCommandContext->Dispatch(dispatchGroupCountX, dispatchGroupCountY, 1);
+ deviceCommandContext->EndComputePass();
+}
+
void CPostprocManager::BlitOutputFramebuffer(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
Renderer::Backend::IFramebuffer* destination)
{
ENSURE(m_IsInitialized);
GPU_SCOPED_LABEL(deviceCommandContext, "Copy postproc to backbuffer");
- Renderer::Backend::IFramebuffer* source =
- (m_WhichBuffer ? m_PingFramebuffer : m_PongFramebuffer).get();
+ Renderer::Backend::ITexture* previousTexture =
+ (m_WhichBuffer ? m_ColorTex1 : m_ColorTex2).get();
- // We blit to the backbuffer from the previous active buffer.
- // We'll have upscaling/downscaling separately.
- Renderer::Backend::IDeviceCommandContext::Rect region{};
- region.width = std::min(source->GetWidth(), destination->GetWidth());
- region.height = std::min(source->GetHeight(), destination->GetHeight());
- deviceCommandContext->BlitFramebuffer(
- source, destination, region, region,
- Renderer::Backend::Sampler::Filter::NEAREST);
+ if (ShouldUpscale())
+ {
+ if (m_UpscaleComputeTech)
+ {
+ Renderer::Backend::ITexture* unscaledTexture = m_RCASComputeTech ? m_UnscaledTexture1.get() : m_UnscaledTexture2.get();
+ UpscaleTextureByCompute(deviceCommandContext, m_UpscaleComputeTech.get(), previousTexture, unscaledTexture);
+ if (m_RCASComputeTech)
+ ApplySharpnessAfterScale(deviceCommandContext, m_RCASComputeTech.get(), m_UnscaledTexture1.get(), m_UnscaledTexture2.get());
+
+ Renderer::Backend::IDeviceCommandContext::Rect sourceRegion{}, destinationRegion{};
+ sourceRegion.width = m_UnscaledTexture2->GetWidth();
+ sourceRegion.height = m_UnscaledTexture2->GetHeight();
+ destinationRegion.width = destination->GetWidth();
+ destinationRegion.height = destination->GetHeight();
+ deviceCommandContext->BlitFramebuffer(
+ m_UnscaledFramebuffer2.get(), destination, sourceRegion, destinationRegion,
+ Renderer::Backend::Sampler::Filter::NEAREST);
+ }
+ else
+ {
+ UpscaleTextureByFullscreenQuad(deviceCommandContext, m_UpscaleTech.get(), previousTexture, destination);
+ }
+ }
+ else if (ShouldDownscale())
+ {
+ Renderer::Backend::IDeviceCommandContext::Rect sourceRegion{};
+ Renderer::Backend::Sampler::Filter samplerFilter{
+ Renderer::Backend::Sampler::Filter::NEAREST};
+ Renderer::Backend::IFramebuffer* source{nullptr};
+
+ if (m_DownscaleComputeTech)
+ {
+ DownscaleTextureByCompute(deviceCommandContext, m_DownscaleComputeTech.get(), previousTexture, m_UnscaledTexture1.get());
+
+ source = m_UnscaledFramebuffer1.get();
+ sourceRegion.width = m_UnscaledTexture1->GetWidth();
+ sourceRegion.height = m_UnscaledTexture1->GetHeight();
+ }
+ else
+ {
+ source = (m_WhichBuffer ? m_PingFramebuffer : m_PongFramebuffer).get();
+ sourceRegion.width = source->GetWidth();
+ sourceRegion.height = source->GetHeight();
+ samplerFilter = Renderer::Backend::Sampler::Filter::LINEAR;
+ }
+
+ Renderer::Backend::IDeviceCommandContext::Rect destinationRegion{};
+ destinationRegion.width = destination->GetWidth();
+ destinationRegion.height = destination->GetHeight();
+ deviceCommandContext->BlitFramebuffer(
+ source, destination, sourceRegion, destinationRegion, samplerFilter);
+ }
+ else
+ {
+ Renderer::Backend::IFramebuffer* source =
+ (m_WhichBuffer ? m_PingFramebuffer : m_PongFramebuffer).get();
+
+ // We blit to the backbuffer from the previous active buffer.
+ Renderer::Backend::IDeviceCommandContext::Rect region{};
+ region.width = std::min(source->GetWidth(), destination->GetWidth());
+ region.height = std::min(source->GetHeight(), destination->GetHeight());
+ deviceCommandContext->BlitFramebuffer(
+ source, destination, region, region,
+ Renderer::Backend::Sampler::Filter::NEAREST);
+ }
}
void CPostprocManager::ApplyEffect(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
const CShaderTechniquePtr& shaderTech, int pass)
{
- // select the other FBO for rendering
+ // Select the other framebuffer for rendering.
Renderer::Backend::IFramebuffer* framebuffer =
(m_WhichBuffer ? m_PongFramebuffer : m_PingFramebuffer).get();
deviceCommandContext->BeginFramebufferPass(framebuffer);
Renderer::Backend::IDeviceCommandContext::Rect viewportRect{};
viewportRect.width = framebuffer->GetWidth();
viewportRect.height = framebuffer->GetHeight();
deviceCommandContext->SetViewports(1, &viewportRect);
deviceCommandContext->SetGraphicsPipelineState(
shaderTech->GetGraphicsPipelineState(pass));
deviceCommandContext->BeginPass();
Renderer::Backend::IShaderProgram* shader = shaderTech->GetShader(pass);
- // Use the textures from the current FBO as input to the shader.
+ // Use the textures from the current framebuffer as input to the shader.
// We also bind a bunch of other textures and parameters, but since
// this only happens once per frame the overhead is negligible.
deviceCommandContext->SetTexture(
shader->GetBindingSlot(str_renderedTex),
m_WhichBuffer ? m_ColorTex1.get() : m_ColorTex2.get());
deviceCommandContext->SetTexture(
shader->GetBindingSlot(str_depthTex), m_DepthTex.get());
deviceCommandContext->SetTexture(
shader->GetBindingSlot(str_blurTex2), m_BlurScales[0].steps[0].texture.get());
deviceCommandContext->SetTexture(
shader->GetBindingSlot(str_blurTex4), m_BlurScales[1].steps[0].texture.get());
deviceCommandContext->SetTexture(
shader->GetBindingSlot(str_blurTex8), m_BlurScales[2].steps[0].texture.get());
deviceCommandContext->SetUniform(shader->GetBindingSlot(str_width), m_Width);
deviceCommandContext->SetUniform(shader->GetBindingSlot(str_height), m_Height);
deviceCommandContext->SetUniform(shader->GetBindingSlot(str_zNear), m_NearPlane);
deviceCommandContext->SetUniform(shader->GetBindingSlot(str_zFar), m_FarPlane);
deviceCommandContext->SetUniform(shader->GetBindingSlot(str_sharpness), m_Sharpness);
deviceCommandContext->SetUniform(shader->GetBindingSlot(str_brightness), g_LightEnv.m_Brightness);
deviceCommandContext->SetUniform(shader->GetBindingSlot(str_hdr), g_LightEnv.m_Contrast);
deviceCommandContext->SetUniform(shader->GetBindingSlot(str_saturation), g_LightEnv.m_Saturation);
deviceCommandContext->SetUniform(shader->GetBindingSlot(str_bloom), g_LightEnv.m_Bloom);
DrawFullscreenQuad(m_VertexInputLayout, deviceCommandContext);
deviceCommandContext->EndPass();
deviceCommandContext->EndFramebufferPass();
m_WhichBuffer = !m_WhichBuffer;
}
void CPostprocManager::ApplyPostproc(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext)
{
ENSURE(m_IsInitialized);
// Don't do anything if we are using the default effect and no AA.
const bool hasEffects = m_PostProcEffect != L"default";
const bool hasARB = m_Device->GetBackend() == Renderer::Backend::Backend::GL_ARB;
const bool hasAA = m_AATech && !hasARB;
const bool hasSharp = m_SharpTech && !hasARB;
if (!hasEffects && !hasAA && !hasSharp)
return;
GPU_SCOPED_LABEL(deviceCommandContext, "Render postproc");
if (hasEffects)
{
// First render blur textures. Note that this only happens ONLY ONCE, before any effects are applied!
// (This may need to change depending on future usage, however that will have a fps hit)
ApplyBlur(deviceCommandContext);
for (int pass = 0; pass < m_PostProcTech->GetNumPasses(); ++pass)
ApplyEffect(deviceCommandContext, m_PostProcTech, pass);
}
if (hasAA)
{
for (int pass = 0; pass < m_AATech->GetNumPasses(); ++pass)
ApplyEffect(deviceCommandContext, m_AATech, pass);
}
- if (hasSharp)
+ if (hasSharp && !ShouldUpscale())
{
for (int pass = 0; pass < m_SharpTech->GetNumPasses(); ++pass)
ApplyEffect(deviceCommandContext, m_SharpTech, pass);
}
}
// Generate list of available effect-sets
std::vector CPostprocManager::GetPostEffects()
{
std::vector effects;
const VfsPath folder(L"shaders/effects/postproc/");
VfsPaths pathnames;
if (vfs::GetPathnames(g_VFS, folder, 0, pathnames) < 0)
LOGERROR("Error finding Post effects in '%s'", folder.string8());
for (const VfsPath& path : pathnames)
if (path.Extension() == L".xml")
effects.push_back(path.Basename().string());
// Add the default "null" effect to the list.
effects.push_back(L"default");
sort(effects.begin(), effects.end());
return effects;
}
void CPostprocManager::SetPostEffect(const CStrW& name)
{
if (m_IsInitialized)
{
if (name != L"default")
{
CStrW n = L"postproc/" + name;
m_PostProcTech = g_Renderer.GetShaderManager().LoadEffect(CStrIntern(n.ToUTF8()));
}
}
m_PostProcEffect = name;
}
void CPostprocManager::UpdateAntiAliasingTechnique()
{
if (m_Device->GetBackend() == Renderer::Backend::Backend::GL_ARB || !m_IsInitialized)
return;
CStr newAAName;
CFG_GET_VAL("antialiasing", newAAName);
if (m_AAName == newAAName)
return;
m_AAName = newAAName;
m_AATech.reset();
if (m_UsingMultisampleBuffer)
{
m_UsingMultisampleBuffer = false;
DestroyMultisampleBuffer();
}
// We have to hardcode names in the engine, because anti-aliasing
// techinques strongly depend on the graphics pipeline.
// We might use enums in future though.
constexpr std::string_view msaaPrefix{"msaa"};
if (m_AAName == str_fxaa.string())
{
m_AATech = g_Renderer.GetShaderManager().LoadEffect(str_fxaa);
}
else if (m_AAName.size() > msaaPrefix.size() &&
std::string_view{m_AAName}.substr(0, msaaPrefix.size()) == msaaPrefix)
{
// We don't want to enable MSAA in Atlas, because it uses wxWidgets and its canvas.
if (g_AtlasGameLoop && g_AtlasGameLoop->running)
return;
if (!m_Device->GetCapabilities().multisampling || m_AllowedSampleCounts.empty())
{
LOGWARNING("MSAA is unsupported.");
return;
}
std::stringstream ss(m_AAName.substr(msaaPrefix.size()));
ss >> m_MultisampleCount;
if (std::find(std::begin(m_AllowedSampleCounts), std::end(m_AllowedSampleCounts), m_MultisampleCount) ==
std::end(m_AllowedSampleCounts))
{
m_MultisampleCount = std::min(4u, m_Device->GetCapabilities().maxSampleCount);
LOGWARNING("Wrong MSAA sample count: %s.", m_AAName.EscapeToPrintableASCII().c_str());
}
m_UsingMultisampleBuffer = true;
CreateMultisampleBuffer();
}
}
void CPostprocManager::UpdateSharpeningTechnique()
{
if (m_Device->GetBackend() == Renderer::Backend::Backend::GL_ARB || !m_IsInitialized)
return;
CStr newSharpName;
CFG_GET_VAL("sharpening", newSharpName);
if (m_SharpName == newSharpName)
return;
m_SharpName = newSharpName;
m_SharpTech.reset();
if (m_SharpName == "cas")
{
m_SharpTech = g_Renderer.GetShaderManager().LoadEffect(CStrIntern(m_SharpName));
}
}
void CPostprocManager::UpdateSharpnessFactor()
{
CFG_GET_VAL("sharpness", m_Sharpness);
}
+void CPostprocManager::SetUpscaleTechnique(const CStr& upscaleName)
+{
+ m_UpscaleTech.reset();
+ m_UpscaleComputeTech.reset();
+ m_RCASComputeTech.reset();
+ if (m_Device->GetCapabilities().computeShaders && upscaleName == "fsr")
+ {
+ m_UpscaleComputeTech = g_Renderer.GetShaderManager().LoadEffect(str_compute_upscale_fsr);
+ m_RCASComputeTech = g_Renderer.GetShaderManager().LoadEffect(str_compute_rcas);
+ }
+ else if (upscaleName == "pixelated")
+ {
+ m_UpscaleTech = g_Renderer.GetShaderManager().LoadEffect(str_upscale_nearest);
+ }
+ else
+ {
+ m_UpscaleTech = g_Renderer.GetShaderManager().LoadEffect(str_upscale_bilinear);
+ }
+}
+
void CPostprocManager::SetDepthBufferClipPlanes(float nearPlane, float farPlane)
{
m_NearPlane = nearPlane;
m_FarPlane = farPlane;
}
void CPostprocManager::CreateMultisampleBuffer()
{
m_MultisampleColorTex = m_Device->CreateTexture("PostProcColorMS",
Renderer::Backend::ITexture::Type::TEXTURE_2D_MULTISAMPLE,
Renderer::Backend::ITexture::Usage::COLOR_ATTACHMENT |
Renderer::Backend::ITexture::Usage::TRANSFER_SRC,
Renderer::Backend::Format::R8G8B8A8_UNORM, m_Width, m_Height,
Renderer::Backend::Sampler::MakeDefaultSampler(
Renderer::Backend::Sampler::Filter::LINEAR,
Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE), 1, m_MultisampleCount);
// Allocate the Depth/Stencil texture.
m_MultisampleDepthTex = m_Device->CreateTexture("PostProcDepthMS",
Renderer::Backend::ITexture::Type::TEXTURE_2D_MULTISAMPLE,
Renderer::Backend::ITexture::Usage::DEPTH_STENCIL_ATTACHMENT |
Renderer::Backend::ITexture::Usage::TRANSFER_SRC,
m_Device->GetPreferredDepthStencilFormat(
Renderer::Backend::ITexture::Usage::DEPTH_STENCIL_ATTACHMENT |
Renderer::Backend::ITexture::Usage::TRANSFER_SRC,
true, true),
m_Width, m_Height,
Renderer::Backend::Sampler::MakeDefaultSampler(
Renderer::Backend::Sampler::Filter::LINEAR,
Renderer::Backend::Sampler::AddressMode::CLAMP_TO_EDGE), 1, m_MultisampleCount);
// Set up the framebuffers with some initial textures.
Renderer::Backend::SColorAttachment colorAttachment{};
colorAttachment.texture = m_MultisampleColorTex.get();
colorAttachment.loadOp = Renderer::Backend::AttachmentLoadOp::DONT_CARE;
colorAttachment.storeOp = Renderer::Backend::AttachmentStoreOp::STORE;
colorAttachment.clearColor = CColor{0.0f, 0.0f, 0.0f, 0.0f};
Renderer::Backend::SDepthStencilAttachment depthStencilAttachment{};
depthStencilAttachment.texture = m_MultisampleDepthTex.get();
depthStencilAttachment.loadOp = Renderer::Backend::AttachmentLoadOp::CLEAR;
depthStencilAttachment.storeOp = Renderer::Backend::AttachmentStoreOp::STORE;
m_MultisampleFramebuffer = m_Device->CreateFramebuffer(
"PostprocMultisampleFramebuffer", &colorAttachment, &depthStencilAttachment);
if (!m_MultisampleFramebuffer)
{
LOGERROR("Failed to create postproc multisample framebuffer");
m_UsingMultisampleBuffer = false;
DestroyMultisampleBuffer();
}
}
void CPostprocManager::DestroyMultisampleBuffer()
{
if (m_UsingMultisampleBuffer)
return;
m_MultisampleFramebuffer.reset();
m_MultisampleColorTex.reset();
m_MultisampleDepthTex.reset();
}
bool CPostprocManager::IsMultisampleEnabled() const
{
return m_UsingMultisampleBuffer;
}
void CPostprocManager::ResolveMultisampleFramebuffer(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext)
{
if (!m_UsingMultisampleBuffer)
return;
GPU_SCOPED_LABEL(deviceCommandContext, "Resolve postproc multisample");
deviceCommandContext->ResolveFramebuffer(
m_MultisampleFramebuffer.get(), m_PingFramebuffer.get());
}
+
+void CPostprocManager::RecalculateSize(const uint32_t width, const uint32_t height)
+{
+ if (m_Device->GetBackend() == Renderer::Backend::Backend::GL_ARB)
+ {
+ m_Scale = 1.0f;
+ return;
+ }
+ CFG_GET_VAL("renderer.scale", m_Scale);
+ if (m_Scale < 0.25f || m_Scale > 2.0f)
+ {
+ LOGWARNING("Invalid renderer scale: %0.2f", m_Scale);
+ m_Scale = 1.0f;
+ }
+ m_UnscaledWidth = width;
+ m_UnscaledHeight = height;
+ m_Width = m_UnscaledWidth * m_Scale;
+ m_Height = m_UnscaledHeight * m_Scale;
+}
+
+bool CPostprocManager::ShouldUpscale() const
+{
+ return m_Width < m_UnscaledWidth;
+}
+
+bool CPostprocManager::ShouldDownscale() const
+{
+ return m_Width > m_UnscaledWidth;
+}
Index: ps/trunk/source/renderer/Renderer.cpp
===================================================================
--- ps/trunk/source/renderer/Renderer.cpp (revision 28009)
+++ ps/trunk/source/renderer/Renderer.cpp (revision 28010)
@@ -1,912 +1,914 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#include "precompiled.h"
#include "Renderer.h"
#include "graphics/Canvas2D.h"
#include "graphics/CinemaManager.h"
#include "graphics/GameView.h"
#include "graphics/LightEnv.h"
#include "graphics/ModelDef.h"
#include "graphics/TerrainTextureManager.h"
#include "i18n/L10n.h"
#include "lib/allocators/shared_ptr.h"
#include "lib/hash.h"
#include "lib/tex/tex.h"
#include "gui/GUIManager.h"
#include "ps/CConsole.h"
#include "ps/CLogger.h"
#include "ps/ConfigDB.h"
#include "ps/CStrInternStatic.h"
#include "ps/Game.h"
#include "ps/GameSetup/Config.h"
#include "ps/GameSetup/GameSetup.h"
#include "ps/Globals.h"
#include "ps/Loader.h"
#include "ps/Profile.h"
#include "ps/Filesystem.h"
#include "ps/World.h"
#include "ps/ProfileViewer.h"
#include "graphics/Camera.h"
#include "graphics/FontManager.h"
#include "graphics/ShaderManager.h"
#include "graphics/Terrain.h"
#include "graphics/Texture.h"
#include "graphics/TextureManager.h"
#include "ps/Util.h"
#include "ps/VideoMode.h"
#include "renderer/backend/IDevice.h"
#include "renderer/DebugRenderer.h"
#include "renderer/PostprocManager.h"
#include "renderer/RenderingOptions.h"
#include "renderer/RenderModifiers.h"
#include "renderer/SceneRenderer.h"
#include "renderer/TimeManager.h"
#include "renderer/VertexBufferManager.h"
#include "tools/atlas/GameInterface/GameLoop.h"
#include "tools/atlas/GameInterface/View.h"
#include
namespace
{
size_t g_NextScreenShotNumber = 0;
///////////////////////////////////////////////////////////////////////////////////
// CRendererStatsTable - Profile display of rendering stats
/**
* Class CRendererStatsTable: Implementation of AbstractProfileTable to
* display the renderer stats in-game.
*
* Accesses CRenderer::m_Stats by keeping the reference passed to the
* constructor.
*/
class CRendererStatsTable : public AbstractProfileTable
{
NONCOPYABLE(CRendererStatsTable);
public:
CRendererStatsTable(const CRenderer::Stats& st);
// Implementation of AbstractProfileTable interface
CStr GetName() override;
CStr GetTitle() override;
size_t GetNumberRows() override;
const std::vector& GetColumns() override;
CStr GetCellText(size_t row, size_t col) override;
AbstractProfileTable* GetChild(size_t row) override;
private:
/// Reference to the renderer singleton's stats
const CRenderer::Stats& Stats;
/// Column descriptions
std::vector columnDescriptions;
enum
{
Row_DrawCalls = 0,
Row_TerrainTris,
Row_WaterTris,
Row_ModelTris,
Row_OverlayTris,
Row_BlendSplats,
Row_Particles,
Row_VBReserved,
Row_VBAllocated,
Row_TextureMemory,
Row_ShadersLoaded,
// Must be last to count number of rows
NumberRows
};
};
// Construction
CRendererStatsTable::CRendererStatsTable(const CRenderer::Stats& st)
: Stats(st)
{
columnDescriptions.push_back(ProfileColumn("Name", 230));
columnDescriptions.push_back(ProfileColumn("Value", 100));
}
// Implementation of AbstractProfileTable interface
CStr CRendererStatsTable::GetName()
{
return "renderer";
}
CStr CRendererStatsTable::GetTitle()
{
return "Renderer statistics";
}
size_t CRendererStatsTable::GetNumberRows()
{
return NumberRows;
}
const std::vector& CRendererStatsTable::GetColumns()
{
return columnDescriptions;
}
CStr CRendererStatsTable::GetCellText(size_t row, size_t col)
{
char buf[256];
switch(row)
{
case Row_DrawCalls:
if (col == 0)
return "# draw calls";
sprintf_s(buf, sizeof(buf), "%lu", (unsigned long)Stats.m_DrawCalls);
return buf;
case Row_TerrainTris:
if (col == 0)
return "# terrain tris";
sprintf_s(buf, sizeof(buf), "%lu", (unsigned long)Stats.m_TerrainTris);
return buf;
case Row_WaterTris:
if (col == 0)
return "# water tris";
sprintf_s(buf, sizeof(buf), "%lu", (unsigned long)Stats.m_WaterTris);
return buf;
case Row_ModelTris:
if (col == 0)
return "# model tris";
sprintf_s(buf, sizeof(buf), "%lu", (unsigned long)Stats.m_ModelTris);
return buf;
case Row_OverlayTris:
if (col == 0)
return "# overlay tris";
sprintf_s(buf, sizeof(buf), "%lu", (unsigned long)Stats.m_OverlayTris);
return buf;
case Row_BlendSplats:
if (col == 0)
return "# blend splats";
sprintf_s(buf, sizeof(buf), "%lu", (unsigned long)Stats.m_BlendSplats);
return buf;
case Row_Particles:
if (col == 0)
return "# particles";
sprintf_s(buf, sizeof(buf), "%lu", (unsigned long)Stats.m_Particles);
return buf;
case Row_VBReserved:
if (col == 0)
return "VB reserved";
sprintf_s(buf, sizeof(buf), "%lu kB", static_cast(g_Renderer.GetVertexBufferManager().GetBytesReserved() / 1024));
return buf;
case Row_VBAllocated:
if (col == 0)
return "VB allocated";
sprintf_s(buf, sizeof(buf), "%lu kB", static_cast(g_Renderer.GetVertexBufferManager().GetBytesAllocated() / 1024));
return buf;
case Row_TextureMemory:
if (col == 0)
return "textures uploaded";
sprintf_s(buf, sizeof(buf), "%lu kB", (unsigned long)g_Renderer.GetTextureManager().GetBytesUploaded() / 1024);
return buf;
case Row_ShadersLoaded:
if (col == 0)
return "shader effects loaded";
sprintf_s(buf, sizeof(buf), "%lu", (unsigned long)g_Renderer.GetShaderManager().GetNumEffectsLoaded());
return buf;
default:
return "???";
}
}
AbstractProfileTable* CRendererStatsTable::GetChild(size_t UNUSED(row))
{
return 0;
}
} // anonymous namespace
///////////////////////////////////////////////////////////////////////////////////
// CRenderer implementation
/**
* Struct CRendererInternals: Truly hide data that is supposed to be hidden
* in this structure so it won't even appear in header files.
*/
class CRenderer::Internals
{
NONCOPYABLE(Internals);
public:
Renderer::Backend::IDevice* device;
std::unique_ptr deviceCommandContext;
/// true if CRenderer::Open has been called
bool IsOpen;
/// true if shaders need to be reloaded
bool ShadersDirty;
/// Table to display renderer stats in-game via profile system
CRendererStatsTable profileTable;
/// Shader manager
CShaderManager shaderManager;
/// Texture manager
CTextureManager textureManager;
CVertexBufferManager vertexBufferManager;
/// Time manager
CTimeManager timeManager;
/// Postprocessing effect manager
CPostprocManager postprocManager;
CSceneRenderer sceneRenderer;
CDebugRenderer debugRenderer;
CFontManager fontManager;
struct VertexAttributesHash
{
size_t operator()(const std::vector& attributes) const;
};
std::unordered_map<
std::vector,
std::unique_ptr, VertexAttributesHash> vertexInputLayouts;
Internals(Renderer::Backend::IDevice* device) :
device(device),
deviceCommandContext(device->CreateCommandContext()),
IsOpen(false), ShadersDirty(true), profileTable(g_Renderer.m_Stats),
shaderManager(device), textureManager(g_VFS, false, device), vertexBufferManager(device),
postprocManager(device), sceneRenderer(device)
{
}
};
size_t CRenderer::Internals::VertexAttributesHash::operator()(
const std::vector& attributes) const
{
size_t seed = 0;
hash_combine(seed, attributes.size());
for (const Renderer::Backend::SVertexAttributeFormat& attribute : attributes)
{
hash_combine(seed, attribute.stream);
hash_combine(seed, attribute.format);
hash_combine(seed, attribute.offset);
hash_combine(seed, attribute.stride);
hash_combine(seed, attribute.rate);
hash_combine(seed, attribute.bindingSlot);
}
return seed;
}
CRenderer::CRenderer(Renderer::Backend::IDevice* device)
{
TIMER(L"InitRenderer");
m = std::make_unique(device);
g_ProfileViewer.AddRootTable(&m->profileTable);
m_Width = 0;
m_Height = 0;
m_Stats.Reset();
// Create terrain related stuff.
new CTerrainTextureManager(device);
Open(g_xres, g_yres);
// Setup lighting environment. Since the Renderer accesses the
// lighting environment through a pointer, this has to be done before
// the first Frame.
GetSceneRenderer().SetLightEnv(&g_LightEnv);
ModelDefActivateFastImpl();
ColorActivateFastImpl();
ModelRenderer::Init();
}
CRenderer::~CRenderer()
{
delete &g_TexMan;
// We no longer UnloadWaterTextures here -
// that is the responsibility of the module that asked for
// them to be loaded (i.e. CGameView).
m.reset();
}
void CRenderer::ReloadShaders()
{
ENSURE(m->IsOpen);
m->sceneRenderer.ReloadShaders(m->device);
m->ShadersDirty = false;
}
bool CRenderer::Open(int width, int height)
{
m->IsOpen = true;
// Dimensions
m_Width = width;
m_Height = height;
// Validate the currently selected render path
SetRenderPath(g_RenderingOptions.GetRenderPath());
m->debugRenderer.Initialize();
if (m->postprocManager.IsEnabled())
m->postprocManager.Initialize();
m->sceneRenderer.Initialize();
return true;
}
void CRenderer::Resize(int width, int height)
{
m_Width = width;
m_Height = height;
m->postprocManager.Resize();
m->sceneRenderer.Resize(width, height);
}
void CRenderer::SetRenderPath(RenderPath rp)
{
if (!m->IsOpen)
{
// Delay until Open() is called.
return;
}
// Renderer has been opened, so validate the selected renderpath
const bool hasShadersSupport =
m->device->GetCapabilities().ARBShaders ||
m->device->GetBackend() != Renderer::Backend::Backend::GL_ARB;
if (rp == RenderPath::DEFAULT)
{
if (hasShadersSupport)
rp = RenderPath::SHADER;
else
rp = RenderPath::FIXED;
}
if (rp == RenderPath::SHADER)
{
if (!hasShadersSupport)
{
LOGWARNING("Falling back to fixed function\n");
rp = RenderPath::FIXED;
}
}
// TODO: remove this once capabilities have been properly extracted and the above checks have been moved elsewhere.
g_RenderingOptions.m_RenderPath = rp;
MakeShadersDirty();
}
bool CRenderer::ShouldRender() const
{
return !g_app_minimized && (g_app_has_focus || !g_VideoMode.IsInFullscreen());
}
void CRenderer::RenderFrame(const bool needsPresent)
{
// Do not render if not focused while in fullscreen or minimised,
// as that triggers a difficult-to-reproduce crash on some graphic cards.
if (!ShouldRender())
return;
if (m_ScreenShotType == ScreenShotType::BIG)
{
RenderBigScreenShot(needsPresent);
}
else if (m_ScreenShotType == ScreenShotType::DEFAULT)
{
RenderScreenShot(needsPresent);
}
else
{
if (needsPresent)
{
// In case of no acquired backbuffer we have nothing render to.
if (!m->device->AcquireNextBackbuffer())
return;
}
if (m_ShouldPreloadResourcesBeforeNextFrame)
{
m_ShouldPreloadResourcesBeforeNextFrame = false;
// We don't need to render logger for the preload.
RenderFrameImpl(true, false);
}
RenderFrameImpl(true, true);
m->deviceCommandContext->Flush();
if (needsPresent)
m->device->Present();
}
}
void CRenderer::RenderFrameImpl(const bool renderGUI, const bool renderLogger)
{
PROFILE3("render");
g_Profiler2.RecordGPUFrameStart();
g_TexMan.UploadResourcesIfNeeded(m->deviceCommandContext.get());
m->textureManager.MakeUploadProgress(m->deviceCommandContext.get());
// prepare before starting the renderer frame
if (g_Game && g_Game->IsGameStarted())
g_Game->GetView()->BeginFrame();
if (g_Game)
m->sceneRenderer.SetSimulation(g_Game->GetSimulation2());
// start new frame
BeginFrame();
if (g_Game && g_Game->IsGameStarted())
{
g_Game->GetView()->Prepare(m->deviceCommandContext.get());
Renderer::Backend::IFramebuffer* framebuffer = nullptr;
+ Renderer::Backend::IDeviceCommandContext::Rect viewportRect{};
CPostprocManager& postprocManager = GetPostprocManager();
if (postprocManager.IsEnabled())
{
// We have to update the post process manager with real near/far planes
// that we use for the scene rendering.
postprocManager.SetDepthBufferClipPlanes(
m->sceneRenderer.GetViewCamera().GetNearPlane(),
m->sceneRenderer.GetViewCamera().GetFarPlane()
);
postprocManager.Initialize();
framebuffer = postprocManager.PrepareAndGetOutputFramebuffer();
+ viewportRect.width = framebuffer->GetWidth();
+ viewportRect.height = framebuffer->GetHeight();
}
else
{
// We don't need to clear the color attachment of the framebuffer as the sky
// is going to be rendered anyway.
framebuffer =
m->deviceCommandContext->GetDevice()->GetCurrentBackbuffer(
Renderer::Backend::AttachmentLoadOp::DONT_CARE,
Renderer::Backend::AttachmentStoreOp::STORE,
Renderer::Backend::AttachmentLoadOp::CLEAR,
Renderer::Backend::AttachmentStoreOp::DONT_CARE);
+
+ viewportRect.width = m_Width;
+ viewportRect.height = m_Height;
}
m->deviceCommandContext->BeginFramebufferPass(framebuffer);
-
- Renderer::Backend::IDeviceCommandContext::Rect viewportRect{};
- viewportRect.width = m_Width;
- viewportRect.height = m_Height;
m->deviceCommandContext->SetViewports(1, &viewportRect);
g_Game->GetView()->Render(m->deviceCommandContext.get());
if (postprocManager.IsEnabled())
{
m->deviceCommandContext->EndFramebufferPass();
if (postprocManager.IsMultisampleEnabled())
postprocManager.ResolveMultisampleFramebuffer(m->deviceCommandContext.get());
postprocManager.ApplyPostproc(m->deviceCommandContext.get());
Renderer::Backend::IFramebuffer* backbuffer =
m->deviceCommandContext->GetDevice()->GetCurrentBackbuffer(
Renderer::Backend::AttachmentLoadOp::LOAD,
Renderer::Backend::AttachmentStoreOp::STORE,
Renderer::Backend::AttachmentLoadOp::LOAD,
Renderer::Backend::AttachmentStoreOp::DONT_CARE);
postprocManager.BlitOutputFramebuffer(
m->deviceCommandContext.get(), backbuffer);
m->deviceCommandContext->BeginFramebufferPass(backbuffer);
Renderer::Backend::IDeviceCommandContext::Rect viewportRect{};
viewportRect.width = m_Width;
viewportRect.height = m_Height;
m->deviceCommandContext->SetViewports(1, &viewportRect);
}
g_Game->GetView()->RenderOverlays(m->deviceCommandContext.get());
g_Game->GetView()->GetCinema()->Render();
}
else
{
// We have a fullscreen background in our UI so we don't need
// to clear the color attachment.
// We don't need a depth test to render so we don't care about the
// depth-stencil attachment content.
// In case of Atlas we don't have g_Game, so we still need to clear depth.
const Renderer::Backend::AttachmentLoadOp depthStencilLoadOp =
g_AtlasGameLoop && g_AtlasGameLoop->view
? Renderer::Backend::AttachmentLoadOp::CLEAR
: Renderer::Backend::AttachmentLoadOp::DONT_CARE;
Renderer::Backend::IFramebuffer* backbuffer =
m->deviceCommandContext->GetDevice()->GetCurrentBackbuffer(
Renderer::Backend::AttachmentLoadOp::DONT_CARE,
Renderer::Backend::AttachmentStoreOp::STORE,
depthStencilLoadOp,
Renderer::Backend::AttachmentStoreOp::DONT_CARE);
m->deviceCommandContext->BeginFramebufferPass(backbuffer);
Renderer::Backend::IDeviceCommandContext::Rect viewportRect{};
viewportRect.width = m_Width;
viewportRect.height = m_Height;
m->deviceCommandContext->SetViewports(1, &viewportRect);
}
// If we're in Atlas game view, render special tools
if (g_AtlasGameLoop && g_AtlasGameLoop->view)
{
g_AtlasGameLoop->view->DrawCinemaPathTool();
}
RenderFrame2D(renderGUI, renderLogger);
m->deviceCommandContext->EndFramebufferPass();
EndFrame();
const Stats& stats = GetStats();
PROFILE2_ATTR("draw calls: %zu", stats.m_DrawCalls);
PROFILE2_ATTR("terrain tris: %zu", stats.m_TerrainTris);
PROFILE2_ATTR("water tris: %zu", stats.m_WaterTris);
PROFILE2_ATTR("model tris: %zu", stats.m_ModelTris);
PROFILE2_ATTR("overlay tris: %zu", stats.m_OverlayTris);
PROFILE2_ATTR("blend splats: %zu", stats.m_BlendSplats);
PROFILE2_ATTR("particles: %zu", stats.m_Particles);
g_Profiler2.RecordGPUFrameEnd();
}
void CRenderer::RenderFrame2D(const bool renderGUI, const bool renderLogger)
{
CCanvas2D canvas(g_xres, g_yres, g_VideoMode.GetScale(), m->deviceCommandContext.get());
m->sceneRenderer.RenderTextOverlays(canvas);
if (renderGUI)
{
GPU_SCOPED_LABEL(m->deviceCommandContext.get(), "Render GUI");
// All GUI elements are drawn in Z order to render semi-transparent
// objects correctly.
g_GUI->Draw(canvas);
}
// If we're in Atlas game view, render special overlays (e.g. editor bandbox).
if (g_AtlasGameLoop && g_AtlasGameLoop->view)
{
g_AtlasGameLoop->view->DrawOverlays(canvas);
}
{
GPU_SCOPED_LABEL(m->deviceCommandContext.get(), "Render console");
g_Console->Render(canvas);
}
if (renderLogger)
{
GPU_SCOPED_LABEL(m->deviceCommandContext.get(), "Render logger");
g_Logger->Render(canvas);
}
{
GPU_SCOPED_LABEL(m->deviceCommandContext.get(), "Render profiler");
// Profile information
g_ProfileViewer.RenderProfile(canvas);
}
}
void CRenderer::RenderScreenShot(const bool needsPresent)
{
m_ScreenShotType = ScreenShotType::NONE;
// get next available numbered filename
// note: %04d -> always 4 digits, so sorting by filename works correctly.
const VfsPath filenameFormat(L"screenshots/screenshot%04d.png");
VfsPath filename;
vfs::NextNumberedFilename(g_VFS, filenameFormat, g_NextScreenShotNumber, filename);
const size_t width = static_cast(g_xres), height = static_cast(g_yres);
const size_t bpp = 24;
if (needsPresent && !m->device->AcquireNextBackbuffer())
return;
// Hide log messages and re-render
RenderFrameImpl(true, false);
const size_t img_size = width * height * bpp / 8;
const size_t hdr_size = tex_hdr_size(filename);
std::shared_ptr buf;
AllocateAligned(buf, hdr_size + img_size, maxSectorSize);
void* img = buf.get() + hdr_size;
Tex t;
if (t.wrap(width, height, bpp, TEX_BOTTOM_UP, buf, hdr_size) < 0)
return;
m->deviceCommandContext->ReadbackFramebufferSync(0, 0, width, height, img);
m->deviceCommandContext->Flush();
if (needsPresent)
m->device->Present();
if (tex_write(&t, filename) == INFO::OK)
{
OsPath realPath;
g_VFS->GetRealPath(filename, realPath);
LOGMESSAGERENDER(g_L10n.Translate("Screenshot written to '%s'"), realPath.string8());
debug_printf(
CStr(g_L10n.Translate("Screenshot written to '%s'") + "\n").c_str(),
realPath.string8().c_str());
}
else
LOGERROR("Error writing screenshot to '%s'", filename.string8());
}
void CRenderer::RenderBigScreenShot(const bool needsPresent)
{
m_ScreenShotType = ScreenShotType::NONE;
// If the game hasn't started yet then use WriteScreenshot to generate the image.
if (!g_Game)
return RenderScreenShot(needsPresent);
int tiles = 4, tileWidth = 256, tileHeight = 256;
CFG_GET_VAL("screenshot.tiles", tiles);
CFG_GET_VAL("screenshot.tilewidth", tileWidth);
CFG_GET_VAL("screenshot.tileheight", tileHeight);
if (tiles <= 0 || tileWidth <= 0 || tileHeight <= 0 || tileWidth * tiles % 4 != 0 || tileHeight * tiles % 4 != 0)
{
LOGWARNING("Invalid big screenshot size: tiles=%d tileWidth=%d tileHeight=%d", tiles, tileWidth, tileHeight);
return;
}
if (g_xres < tileWidth && g_yres < tileHeight)
{
LOGWARNING(
"The window size is too small for a big screenshot, increase the"
" window size %dx%d or decrease the tile size %dx%d",
g_xres, g_yres, tileWidth, tileHeight);
return;
}
// get next available numbered filename
// note: %04d -> always 4 digits, so sorting by filename works correctly.
const VfsPath filenameFormat(L"screenshots/screenshot%04d.bmp");
VfsPath filename;
vfs::NextNumberedFilename(g_VFS, filenameFormat, g_NextScreenShotNumber, filename);
const int imageWidth = tileWidth * tiles, imageHeight = tileHeight * tiles;
const int bpp = 24;
const size_t imageSize = imageWidth * imageHeight * bpp / 8;
const size_t tileSize = tileWidth * tileHeight * bpp / 8;
const size_t headerSize = tex_hdr_size(filename);
void* tileData = malloc(tileSize);
if (!tileData)
{
WARN_IF_ERR(ERR::NO_MEM);
return;
}
std::shared_ptr imageBuffer;
AllocateAligned(imageBuffer, headerSize + imageSize, maxSectorSize);
Tex t;
void* img = imageBuffer.get() + headerSize;
if (t.wrap(imageWidth, imageHeight, bpp, TEX_BOTTOM_UP, imageBuffer, headerSize) < 0)
{
free(tileData);
return;
}
CCamera oldCamera = *g_Game->GetView()->GetCamera();
// Resize various things so that the sizes and aspect ratios are correct
{
g_Renderer.Resize(tileWidth, tileHeight);
SViewPort vp = { 0, 0, tileWidth, tileHeight };
g_Game->GetView()->SetViewport(vp);
}
// Render each tile
CMatrix3D projection;
projection.SetIdentity();
const float aspectRatio = 1.0f * tileWidth / tileHeight;
for (int tileY = 0; tileY < tiles; ++tileY)
{
for (int tileX = 0; tileX < tiles; ++tileX)
{
// Adjust the camera to render the appropriate region
if (oldCamera.GetProjectionType() == CCamera::ProjectionType::PERSPECTIVE)
{
projection.SetPerspectiveTile(
oldCamera.GetFOV(), aspectRatio, oldCamera.GetNearPlane(), oldCamera.GetFarPlane(),
tiles, tileX, tileY);
}
g_Game->GetView()->GetCamera()->SetProjection(projection);
if (!needsPresent || m->device->AcquireNextBackbuffer())
{
RenderFrameImpl(false, false);
m->deviceCommandContext->ReadbackFramebufferSync(0, 0, tileWidth, tileHeight, tileData);
m->deviceCommandContext->Flush();
if (needsPresent)
m->device->Present();
}
// Copy the tile pixels into the main image
for (int y = 0; y < tileHeight; ++y)
{
void* dest = static_cast(img) + ((tileY * tileHeight + y) * imageWidth + (tileX * tileWidth)) * bpp / 8;
void* src = static_cast(tileData) + y * tileWidth * bpp / 8;
memcpy(dest, src, tileWidth * bpp / 8);
}
}
}
// Restore the viewport settings
{
g_Renderer.Resize(g_xres, g_yres);
SViewPort vp = { 0, 0, g_xres, g_yres };
g_Game->GetView()->SetViewport(vp);
g_Game->GetView()->GetCamera()->SetProjectionFromCamera(oldCamera);
}
if (tex_write(&t, filename) == INFO::OK)
{
OsPath realPath;
g_VFS->GetRealPath(filename, realPath);
LOGMESSAGERENDER(g_L10n.Translate("Screenshot written to '%s'"), realPath.string8());
debug_printf(
CStr(g_L10n.Translate("Screenshot written to '%s'") + "\n").c_str(),
realPath.string8().c_str());
}
else
LOGERROR("Error writing screenshot to '%s'", filename.string8());
free(tileData);
}
void CRenderer::BeginFrame()
{
PROFILE("begin frame");
// Zero out all the per-frame stats.
m_Stats.Reset();
if (m->ShadersDirty)
ReloadShaders();
m->sceneRenderer.BeginFrame();
}
void CRenderer::EndFrame()
{
PROFILE3("end frame");
m->sceneRenderer.EndFrame();
}
void CRenderer::MakeShadersDirty()
{
m->ShadersDirty = true;
m->sceneRenderer.MakeShadersDirty();
}
CTextureManager& CRenderer::GetTextureManager()
{
return m->textureManager;
}
CVertexBufferManager& CRenderer::GetVertexBufferManager()
{
return m->vertexBufferManager;
}
CShaderManager& CRenderer::GetShaderManager()
{
return m->shaderManager;
}
CTimeManager& CRenderer::GetTimeManager()
{
return m->timeManager;
}
CPostprocManager& CRenderer::GetPostprocManager()
{
return m->postprocManager;
}
CSceneRenderer& CRenderer::GetSceneRenderer()
{
return m->sceneRenderer;
}
CDebugRenderer& CRenderer::GetDebugRenderer()
{
return m->debugRenderer;
}
CFontManager& CRenderer::GetFontManager()
{
return m->fontManager;
}
void CRenderer::PreloadResourcesBeforeNextFrame()
{
m_ShouldPreloadResourcesBeforeNextFrame = true;
}
void CRenderer::MakeScreenShotOnNextFrame(ScreenShotType screenShotType)
{
m_ScreenShotType = screenShotType;
}
Renderer::Backend::IDeviceCommandContext* CRenderer::GetDeviceCommandContext()
{
return m->deviceCommandContext.get();
}
Renderer::Backend::IVertexInputLayout* CRenderer::GetVertexInputLayout(
const PS::span attributes)
{
const auto [it, inserted] = m->vertexInputLayouts.emplace(
std::vector{attributes.begin(), attributes.end()}, nullptr);
if (inserted)
it->second = m->device->CreateVertexInputLayout(attributes);
return it->second.get();
}
Index: ps/trunk/source/renderer/backend/IDeviceCommandContext.h
===================================================================
--- ps/trunk/source/renderer/backend/IDeviceCommandContext.h (revision 28009)
+++ ps/trunk/source/renderer/backend/IDeviceCommandContext.h (revision 28010)
@@ -1,214 +1,247 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#ifndef INCLUDED_RENDERER_BACKEND_IDEVICECOMMANDCONTEXT
#define INCLUDED_RENDERER_BACKEND_IDEVICECOMMANDCONTEXT
#include "ps/containers/Span.h"
#include "renderer/backend/Format.h"
#include "renderer/backend/IDeviceObject.h"
#include "renderer/backend/PipelineState.h"
#include "renderer/backend/Sampler.h"
#include
#include
namespace Renderer
{
namespace Backend
{
class IBuffer;
class IDevice;
class IFramebuffer;
class ITexture;
class IDeviceCommandContext : public IDeviceObject
{
public:
/**
* Binds the graphics pipeline state. It should be called only inside a
* framebuffer pass and as rarely as possible.
*/
virtual void SetGraphicsPipelineState(IGraphicsPipelineState* pipelineState) = 0;
+ /**
+ * Binds the graphics pipeline state. It should be called only inside a
+ * framebuffer pass and as rarely as possible.
+ */
+ virtual void SetComputePipelineState(IComputePipelineState* pipelineState) = 0;
+
// TODO: maybe we should add a more common type, like CRectI.
struct Rect
{
int32_t x, y;
int32_t width, height;
};
/**
* Copies source region into destination region automatically applying
* compatible format conversion and scaling using a provided filter.
* A backbuffer can't be a source.
*/
virtual void BlitFramebuffer(
IFramebuffer* sourceFramebuffer, IFramebuffer* destinationFramebuffer,
const Rect& sourceRegion, const Rect& destinationRegion,
const Sampler::Filter filter) = 0;
/**
* Resolves multisample source framebuffer attachments to destination
* attachments. Source attachments should have a sample count > 1 and
* destination attachments should have a sample count = 1.
* A backbuffer can't be a source.
*/
virtual void ResolveFramebuffer(
IFramebuffer* sourceFramebuffer, IFramebuffer* destinationFramebuffer) = 0;
/**
* Starts a framebuffer pass, performs attachment load operations.
* It should be called as rarely as possible.
*
* @see IFramebuffer
*/
virtual void BeginFramebufferPass(IFramebuffer* framebuffer) = 0;
/**
* Finishes a framebuffer pass, performs attachment store operations.
*/
virtual void EndFramebufferPass() = 0;
/**
* Clears all mentioned attachments. Prefer to use attachment load operations over
* this function. It should be called only inside a framebuffer pass.
*/
virtual void ClearFramebuffer(const bool color, const bool depth, const bool stencil) = 0;
/**
* Readbacks the current backbuffer to data in R8G8B8_UNORM format somewhen
* between the function call and Flush (inclusively). Because of that the
* data pointer should be valid in that time period and have enough space
* to fit the readback result.
* @note this operation is very slow and should not be used regularly.
* TODO: ideally we should do readback on Present or even asynchronously
* but a client doesn't support that yet.
*/
virtual void ReadbackFramebufferSync(
const uint32_t x, const uint32_t y, const uint32_t width, const uint32_t height,
void* data) = 0;
virtual void UploadTexture(ITexture* texture, const Format dataFormat,
const void* data, const size_t dataSize,
const uint32_t level = 0, const uint32_t layer = 0) = 0;
virtual void UploadTextureRegion(ITexture* texture, const Format dataFormat,
const void* data, const size_t dataSize,
const uint32_t xOffset, const uint32_t yOffset,
const uint32_t width, const uint32_t height,
const uint32_t level = 0, const uint32_t layer = 0) = 0;
using UploadBufferFunction = std::function;
virtual void UploadBuffer(IBuffer* buffer, const void* data, const uint32_t dataSize) = 0;
virtual void UploadBuffer(IBuffer* buffer, const UploadBufferFunction& uploadFunction) = 0;
virtual void UploadBufferRegion(
IBuffer* buffer, const void* data, const uint32_t dataOffset, const uint32_t dataSize) = 0;
virtual void UploadBufferRegion(
IBuffer* buffer, const uint32_t dataOffset, const uint32_t dataSize,
const UploadBufferFunction& uploadFunction) = 0;
virtual void SetScissors(const uint32_t scissorCount, const Rect* scissors) = 0;
virtual void SetViewports(const uint32_t viewportCount, const Rect* viewports) = 0;
/**
* Binds the vertex input layout. It should be compatible with the shader
* program's one. It should be called only inside a framebuffer pass and as
* rarely as possible.
*/
virtual void SetVertexInputLayout(
IVertexInputLayout* vertexInputLayout) = 0;
virtual void SetVertexBuffer(
const uint32_t bindingSlot, IBuffer* buffer, const uint32_t offset) = 0;
virtual void SetVertexBufferData(
const uint32_t bindingSlot, const void* data, const uint32_t dataSize) = 0;
virtual void SetIndexBuffer(IBuffer* buffer) = 0;
virtual void SetIndexBufferData(const void* data, const uint32_t dataSize) = 0;
virtual void BeginPass() = 0;
virtual void EndPass() = 0;
virtual void Draw(const uint32_t firstVertex, const uint32_t vertexCount) = 0;
virtual void DrawIndexed(
const uint32_t firstIndex, const uint32_t indexCount, const int32_t vertexOffset) = 0;
virtual void DrawInstanced(
const uint32_t firstVertex, const uint32_t vertexCount,
const uint32_t firstInstance, const uint32_t instanceCount) = 0;
virtual void DrawIndexedInstanced(
const uint32_t firstIndex, const uint32_t indexCount,
const uint32_t firstInstance, const uint32_t instanceCount,
const int32_t vertexOffset) = 0;
// TODO: should be removed when performance impact is minimal on slow hardware.
virtual void DrawIndexedInRange(
const uint32_t firstIndex, const uint32_t indexCount,
const uint32_t start, const uint32_t end) = 0;
+ /**
+ * Starts a compute pass, can't be called inside a framebuffer pass.
+ * It should be called as rarely as possible.
+ */
+ virtual void BeginComputePass() = 0;
+
+ /**
+ * Finishes a compute pass.
+ */
+ virtual void EndComputePass() = 0;
+
+ /**
+ * Dispatches groupCountX * groupCountY * groupCountZ compute groups.
+ */
+ virtual void Dispatch(
+ const uint32_t groupCountX,
+ const uint32_t groupCountY,
+ const uint32_t groupCountZ) = 0;
+
+ /**
+ * Sets a read-only texture to the binding slot.
+ */
virtual void SetTexture(const int32_t bindingSlot, ITexture* texture) = 0;
+ /**
+ * Sets a read & write resource to the binding slot.
+ */
+ virtual void SetStorageTexture(const int32_t bindingSlot, ITexture* texture) = 0;
+
virtual void SetUniform(
const int32_t bindingSlot,
const float value) = 0;
virtual void SetUniform(
const int32_t bindingSlot,
const float valueX, const float valueY) = 0;
virtual void SetUniform(
const int32_t bindingSlot,
const float valueX, const float valueY,
const float valueZ) = 0;
virtual void SetUniform(
const int32_t bindingSlot,
const float valueX, const float valueY,
const float valueZ, const float valueW) = 0;
virtual void SetUniform(
const int32_t bindingSlot, PS::span values) = 0;
virtual void BeginScopedLabel(const char* name) = 0;
virtual void EndScopedLabel() = 0;
virtual void Flush() = 0;
};
} // namespace Backend
} // namespace Renderer
#define GPU_SCOPED_LABEL(deviceCommandContext, name) \
GPUScopedLabel scopedLabel((deviceCommandContext), (name));
class GPUScopedLabel
{
public:
GPUScopedLabel(
Renderer::Backend::IDeviceCommandContext* deviceCommandContext,
const char* name)
: m_DeviceCommandContext(deviceCommandContext)
{
m_DeviceCommandContext->BeginScopedLabel(name);
}
~GPUScopedLabel()
{
m_DeviceCommandContext->EndScopedLabel();
}
private:
Renderer::Backend::IDeviceCommandContext* m_DeviceCommandContext = nullptr;
};
#endif // INCLUDED_RENDERER_BACKEND_IDEVICECOMMANDCONTEXT
Index: ps/trunk/source/renderer/backend/dummy/Device.cpp
===================================================================
--- ps/trunk/source/renderer/backend/dummy/Device.cpp (revision 28009)
+++ ps/trunk/source/renderer/backend/dummy/Device.cpp (revision 28010)
@@ -1,171 +1,177 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#include "precompiled.h"
#include "Device.h"
#include "renderer/backend/dummy/Buffer.h"
#include "renderer/backend/dummy/DeviceCommandContext.h"
#include "renderer/backend/dummy/Framebuffer.h"
#include "renderer/backend/dummy/PipelineState.h"
#include "renderer/backend/dummy/ShaderProgram.h"
#include "renderer/backend/dummy/Texture.h"
#include "scriptinterface/JSON.h"
#include "scriptinterface/Object.h"
#include "scriptinterface/ScriptInterface.h"
#include "scriptinterface/ScriptRequest.h"
namespace Renderer
{
namespace Backend
{
namespace Dummy
{
CDevice::CDevice()
{
m_Name = "Dummy";
m_Version = "Unknown";
m_DriverInformation = "Unknown";
m_Extensions = {};
m_Backbuffer = CFramebuffer::Create(this);
m_Capabilities.S3TC = true;
m_Capabilities.ARBShaders = false;
m_Capabilities.ARBShadersShadow = false;
m_Capabilities.computeShaders = true;
m_Capabilities.debugLabels = true;
m_Capabilities.debugScopedLabels = true;
m_Capabilities.multisampling = true;
m_Capabilities.anisotropicFiltering = true;
m_Capabilities.maxSampleCount = 4u;
m_Capabilities.maxAnisotropy = 16.0f;
m_Capabilities.maxTextureSize = 8192u;
m_Capabilities.instancing = true;
}
CDevice::~CDevice() = default;
void CDevice::Report(const ScriptRequest& rq, JS::HandleValue settings)
{
Script::SetProperty(rq, settings, "name", "dummy");
}
std::unique_ptr CDevice::CreateCommandContext()
{
return CDeviceCommandContext::Create(this);
}
std::unique_ptr CDevice::CreateGraphicsPipelineState(
const SGraphicsPipelineStateDesc& pipelineStateDesc)
{
return CGraphicsPipelineState::Create(this, pipelineStateDesc);
}
+std::unique_ptr CDevice::CreateComputePipelineState(
+ const SComputePipelineStateDesc& pipelineStateDesc)
+{
+ return CComputePipelineState::Create(this, pipelineStateDesc);
+}
+
std::unique_ptr CDevice::CreateVertexInputLayout(
const PS::span UNUSED(attributes))
{
return nullptr;
}
std::unique_ptr CDevice::CreateTexture(
const char* UNUSED(name), const CTexture::Type type, const uint32_t usage,
const Format format, const uint32_t width, const uint32_t height,
const Sampler::Desc& UNUSED(defaultSamplerDesc), const uint32_t MIPLevelCount, const uint32_t UNUSED(sampleCount))
{
return CTexture::Create(this, type, usage, format, width, height, MIPLevelCount);
}
std::unique_ptr CDevice::CreateTexture2D(
const char* name, const uint32_t usage,
const Format format, const uint32_t width, const uint32_t height,
const Sampler::Desc& defaultSamplerDesc, const uint32_t MIPLevelCount, const uint32_t sampleCount)
{
return CreateTexture(name, ITexture::Type::TEXTURE_2D, usage,
format, width, height, defaultSamplerDesc, MIPLevelCount, sampleCount);
}
std::unique_ptr CDevice::CreateFramebuffer(
const char*, SColorAttachment*, SDepthStencilAttachment*)
{
return CFramebuffer::Create(this);
}
std::unique_ptr CDevice::CreateBuffer(
const char*, const CBuffer::Type type, const uint32_t size, const bool dynamic)
{
return CBuffer::Create(this, type, size, dynamic);
}
std::unique_ptr CDevice::CreateShaderProgram(
const CStr&, const CShaderDefines&)
{
return CShaderProgram::Create(this);
}
bool CDevice::AcquireNextBackbuffer()
{
// We have nothing to acquire.
return true;
}
IFramebuffer* CDevice::GetCurrentBackbuffer(
const AttachmentLoadOp, const AttachmentStoreOp,
const AttachmentLoadOp, const AttachmentStoreOp)
{
return m_Backbuffer.get();
}
void CDevice::Present()
{
// We have nothing to present.
}
void CDevice::OnWindowResize(const uint32_t UNUSED(width), const uint32_t UNUSED(height))
{
}
bool CDevice::IsTextureFormatSupported(const Format UNUSED(format)) const
{
return true;
}
bool CDevice::IsFramebufferFormatSupported(const Format UNUSED(format)) const
{
return true;
}
Format CDevice::GetPreferredDepthStencilFormat(
const uint32_t, const bool, const bool) const
{
return Format::D24_UNORM_S8_UINT;
}
std::unique_ptr CreateDevice(SDL_Window* UNUSED(window))
{
return std::make_unique();
}
} // namespace Dummy
} // namespace Backend
} // namespace Renderer
Index: ps/trunk/source/renderer/backend/dummy/DeviceCommandContext.h
===================================================================
--- ps/trunk/source/renderer/backend/dummy/DeviceCommandContext.h (revision 28009)
+++ ps/trunk/source/renderer/backend/dummy/DeviceCommandContext.h (revision 28010)
@@ -1,154 +1,165 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#ifndef INCLUDED_RENDERER_BACKEND_DUMMY_DEVICECOMMANDCONTEXT
#define INCLUDED_RENDERER_BACKEND_DUMMY_DEVICECOMMANDCONTEXT
#include "renderer/backend/Format.h"
#include "renderer/backend/IDeviceCommandContext.h"
#include "renderer/backend/PipelineState.h"
#include
namespace Renderer
{
namespace Backend
{
namespace Dummy
{
class CDevice;
class CBuffer;
class CFramebuffer;
class CShaderProgram;
class CTexture;
class CDeviceCommandContext : public IDeviceCommandContext
{
public:
~CDeviceCommandContext();
IDevice* GetDevice() override;
void SetGraphicsPipelineState(IGraphicsPipelineState* pipelineState) override;
+ void SetComputePipelineState(IComputePipelineState* pipelineState) override;
void BlitFramebuffer(
IFramebuffer* sourceFramebuffer, IFramebuffer* destinationFramebuffer,
const Rect& sourceRegion, const Rect& destinationRegion,
const Sampler::Filter filter) override;
void ResolveFramebuffer(
IFramebuffer* sourceFramebuffer, IFramebuffer* destinationFramebuffer) override;
void ClearFramebuffer(const bool color, const bool depth, const bool stencil) override;
void BeginFramebufferPass(IFramebuffer* framebuffer) override;
void EndFramebufferPass() override;
void ReadbackFramebufferSync(
const uint32_t x, const uint32_t y, const uint32_t width, const uint32_t height,
void* data) override;
void UploadTexture(ITexture* texture, const Format dataFormat,
const void* data, const size_t dataSize,
const uint32_t level = 0, const uint32_t layer = 0) override;
void UploadTextureRegion(ITexture* texture, const Format dataFormat,
const void* data, const size_t dataSize,
const uint32_t xOffset, const uint32_t yOffset,
const uint32_t width, const uint32_t height,
const uint32_t level = 0, const uint32_t layer = 0) override;
using UploadBufferFunction = std::function;
void UploadBuffer(IBuffer* buffer, const void* data, const uint32_t dataSize) override;
void UploadBuffer(IBuffer* buffer, const UploadBufferFunction& uploadFunction) override;
void UploadBufferRegion(
IBuffer* buffer, const void* data, const uint32_t dataOffset, const uint32_t dataSize) override;
void UploadBufferRegion(
IBuffer* buffer, const uint32_t dataOffset, const uint32_t dataSize,
const UploadBufferFunction& uploadFunction) override;
void SetScissors(const uint32_t scissorCount, const Rect* scissors) override;
void SetViewports(const uint32_t viewportCount, const Rect* viewports) override;
void SetVertexInputLayout(
IVertexInputLayout* vertexInputLayout) override;
void SetVertexBuffer(
const uint32_t bindingSlot, IBuffer* buffer, const uint32_t offset) override;
void SetVertexBufferData(
const uint32_t bindingSlot, const void* data, const uint32_t dataSize) override;
void SetIndexBuffer(IBuffer* buffer) override;
void SetIndexBufferData(const void* data, const uint32_t dataSize) override;
void BeginPass() override;
void EndPass() override;
void Draw(const uint32_t firstVertex, const uint32_t vertexCount) override;
void DrawIndexed(
const uint32_t firstIndex, const uint32_t indexCount, const int32_t vertexOffset) override;
void DrawInstanced(
const uint32_t firstVertex, const uint32_t vertexCount,
const uint32_t firstInstance, const uint32_t instanceCount) override;
void DrawIndexedInstanced(
const uint32_t firstIndex, const uint32_t indexCount,
const uint32_t firstInstance, const uint32_t instanceCount,
const int32_t vertexOffset) override;
void DrawIndexedInRange(
const uint32_t firstIndex, const uint32_t indexCount,
const uint32_t start, const uint32_t end) override;
+ void BeginComputePass() override;
+ void EndComputePass() override;
+
+ void Dispatch(
+ const uint32_t groupCountX,
+ const uint32_t groupCountY,
+ const uint32_t groupCountZ) override;
+
void SetTexture(const int32_t bindingSlot, ITexture* texture) override;
+ void SetStorageTexture(const int32_t bindingSlot, ITexture* texture) override;
+
void SetUniform(
const int32_t bindingSlot,
const float value) override;
void SetUniform(
const int32_t bindingSlot,
const float valueX, const float valueY) override;
void SetUniform(
const int32_t bindingSlot,
const float valueX, const float valueY,
const float valueZ) override;
void SetUniform(
const int32_t bindingSlot,
const float valueX, const float valueY,
const float valueZ, const float valueW) override;
void SetUniform(
const int32_t bindingSlot, PS::span values) override;
void BeginScopedLabel(const char* name) override;
void EndScopedLabel() override;
void Flush() override;
private:
friend class CDevice;
static std::unique_ptr Create(CDevice* device);
CDeviceCommandContext();
CDevice* m_Device = nullptr;
};
} // namespace Dummy
} // namespace Backend
} // namespace Renderer
#endif // INCLUDED_RENDERER_BACKEND_DUMMY_DEVICECOMMANDCONTEXT
Index: ps/trunk/source/renderer/backend/gl/Device.cpp
===================================================================
--- ps/trunk/source/renderer/backend/gl/Device.cpp (revision 28009)
+++ ps/trunk/source/renderer/backend/gl/Device.cpp (revision 28010)
@@ -1,1074 +1,1080 @@
-/* Copyright (C) 2023 Wildfire Games.
+/* Copyright (C) 2024 Wildfire Games.
* This file is part of 0 A.D.
*
* 0 A.D. is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* 0 A.D. is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with 0 A.D. If not, see .
*/
#include "precompiled.h"
#include "Device.h"
#include "lib/external_libraries/libsdl.h"
#include "lib/hash.h"
#include "lib/ogl.h"
#include "ps/CLogger.h"
#include "ps/ConfigDB.h"
#include "ps/Profile.h"
#include "renderer/backend/gl/DeviceCommandContext.h"
#include "renderer/backend/gl/PipelineState.h"
#include "renderer/backend/gl/Texture.h"
#include "scriptinterface/JSON.h"
#include "scriptinterface/Object.h"
#include "scriptinterface/ScriptInterface.h"
#include "scriptinterface/ScriptRequest.h"
#if OS_WIN
// We can't include wutil directly because GL headers conflict with Windows
// until we use a proper GL loader.
extern void* wutil_GetAppHDC();
#endif
#include
#include
#include
#if !CONFIG2_GLES && (defined(SDL_VIDEO_DRIVER_X11) || defined(SDL_VIDEO_DRIVER_WAYLAND))
#if defined(SDL_VIDEO_DRIVER_X11)
#include
#endif
#if defined(SDL_VIDEO_DRIVER_WAYLAND)
#include
#endif
#include
#endif // !CONFIG2_GLES && (defined(SDL_VIDEO_DRIVER_X11) || defined(SDL_VIDEO_DRIVER_WAYLAND))
namespace Renderer
{
namespace Backend
{
namespace GL
{
namespace
{
std::string GetNameImpl()
{
// GL_VENDOR+GL_RENDERER are good enough here, so we don't use WMI to detect the cards.
// On top of that WMI can cause crashes with Nvidia Optimus and some netbooks
// see http://trac.wildfiregames.com/ticket/1952
// http://trac.wildfiregames.com/ticket/1575
char cardName[128];
const char* vendor = reinterpret_cast(glGetString(GL_VENDOR));
const char* renderer = reinterpret_cast(glGetString(GL_RENDERER));
// Happens if called before GL initialization.
if (!vendor || !renderer)
return {};
sprintf_s(cardName, std::size(cardName), "%s %s", vendor, renderer);
// Remove crap from vendor names. (don't dare touch the model name -
// it's too risky, there are too many different strings).
#define SHORTEN(what, charsToKeep) \
if (!strncmp(cardName, what, std::size(what) - 1)) \
memmove(cardName + charsToKeep, cardName + std::size(what) - 1, (strlen(cardName) - (std::size(what) - 1) + 1) * sizeof(char));
SHORTEN("ATI Technologies Inc.", 3);
SHORTEN("NVIDIA Corporation", 6);
SHORTEN("S3 Graphics", 2); // returned by EnumDisplayDevices
SHORTEN("S3 Graphics, Incorporated", 2); // returned by GL_VENDOR
#undef SHORTEN
return cardName;
}
std::string GetVersionImpl()
{
return reinterpret_cast(glGetString(GL_VERSION));
}
std::string GetDriverInformationImpl()
{
// Usually GL_VERSION contains both OpenGL and driver versions.
return reinterpret_cast(glGetString(GL_VERSION));
}
std::vector GetExtensionsImpl()
{
std::vector